rbtagger 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +21 -0
- data/History.txt +4 -0
- data/LICENSE +21 -0
- data/License.txt +20 -0
- data/Manifest.txt +75 -0
- data/PostInstall.txt +7 -0
- data/README +7 -0
- data/README.txt +53 -0
- data/Rakefile +33 -0
- data/config/hoe.rb +74 -0
- data/config/requirements.rb +15 -0
- data/ext/rule_tagger/bool.h +38 -0
- data/ext/rule_tagger/darray.c +292 -0
- data/ext/rule_tagger/darray.h +125 -0
- data/ext/rule_tagger/darrayP.h +50 -0
- data/ext/rule_tagger/extconf.rb +14 -0
- data/ext/rule_tagger/lex.c +170 -0
- data/ext/rule_tagger/lex.h +49 -0
- data/ext/rule_tagger/memory.c +127 -0
- data/ext/rule_tagger/memory.h +20 -0
- data/ext/rule_tagger/rbtagger.c +252 -0
- data/ext/rule_tagger/registry.c +326 -0
- data/ext/rule_tagger/registry.h +129 -0
- data/ext/rule_tagger/registryP.h +46 -0
- data/ext/rule_tagger/ruby-compat.h +20 -0
- data/ext/rule_tagger/rules.c +525 -0
- data/ext/rule_tagger/rules.h +42 -0
- data/ext/rule_tagger/sysdep.h +20 -0
- data/ext/rule_tagger/tagger.c +110 -0
- data/ext/rule_tagger/tagger.h +46 -0
- data/ext/rule_tagger/useful.c +44 -0
- data/ext/rule_tagger/useful.h +51 -0
- data/ext/word_tagger/extconf.rb +7 -0
- data/ext/word_tagger/porter_stemmer.c +430 -0
- data/ext/word_tagger/porter_stemmer.h +19 -0
- data/ext/word_tagger/rtagger.cc +83 -0
- data/ext/word_tagger/tagger.cc +153 -0
- data/ext/word_tagger/tagger.h +27 -0
- data/ext/word_tagger/tagger.rb +8 -0
- data/ext/word_tagger/test/Makefile +22 -0
- data/ext/word_tagger/test/doc.txt +87 -0
- data/ext/word_tagger/test/test.cc +107 -0
- data/ext/word_tagger/test.rb +31 -0
- data/lib/brill/tagger.rb +225 -0
- data/lib/rbtagger/version.rb +9 -0
- data/lib/rbtagger.rb +6 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/CONTEXTUALRULEFILE +284 -0
- data/test/LEXICALRULEFILE +148 -0
- data/test/LEXICON +93696 -0
- data/test/docs/doc0.txt +20 -0
- data/test/docs/doc1.txt +11 -0
- data/test/docs/doc2.txt +52 -0
- data/test/docs/doc3.txt +128 -0
- data/test/docs/doc4.txt +337 -0
- data/test/docs/doc5.txt +497 -0
- data/test/docs/doc6.txt +116 -0
- data/test/docs/doc7.txt +101 -0
- data/test/docs/doc8.txt +25 -0
- data/test/docs/doc9.txt +84 -0
- data/test/tagger_test.rb +60 -0
- data/test/test_helper.rb +2 -0
- data/tools/rakehelp.rb +113 -0
- data/website/index.html +113 -0
- data/website/index.txt +53 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.html.erb +48 -0
- metadata +155 -0
@@ -0,0 +1,252 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2008 Todd A. Fisher
|
3
|
+
* see LICENSE
|
4
|
+
*/
|
5
|
+
#include "ruby.h"
|
6
|
+
#include "tagger.h"
|
7
|
+
#include "ruby-compat.h"
|
8
|
+
|
9
|
+
static VALUE rb_Tagger;
|
10
|
+
static VALUE rb_BrillTagger;
|
11
|
+
|
12
|
+
static
|
13
|
+
VALUE BrillTagger_alloc(VALUE klass)
|
14
|
+
{
|
15
|
+
VALUE object;
|
16
|
+
TaggerContext *tc = tagger_context_new();
|
17
|
+
object = Data_Wrap_Struct( klass, NULL, tagger_context_free, tc );
|
18
|
+
return object;
|
19
|
+
}
|
20
|
+
|
21
|
+
static VALUE
|
22
|
+
BrillTagger_add_to_lexicon( VALUE self, VALUE word, VALUE tag )
|
23
|
+
{
|
24
|
+
TaggerContext *tc;
|
25
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
26
|
+
tagger_context_add_to_lexicon( tc, RSTRING_PTR(word), RSTRING_PTR(tag) );
|
27
|
+
return Qnil;
|
28
|
+
}
|
29
|
+
static VALUE
|
30
|
+
BrillTagger_add_to_lexicon_tags( VALUE self, VALUE bigram )
|
31
|
+
{
|
32
|
+
TaggerContext *tc;
|
33
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
34
|
+
tagger_context_add_to_lexicon_tags( tc, RSTRING_PTR(bigram) );
|
35
|
+
return Qnil;
|
36
|
+
}
|
37
|
+
static VALUE
|
38
|
+
BrillTagger_add_contextual_rule( VALUE self, VALUE rule )
|
39
|
+
{
|
40
|
+
TaggerContext *tc;
|
41
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
42
|
+
tagger_context_add_contextual_rule( tc, RSTRING_PTR(rule) );
|
43
|
+
return Qnil;
|
44
|
+
}
|
45
|
+
static VALUE
|
46
|
+
BrillTagger_add_lexical_rule( VALUE self, VALUE rule )
|
47
|
+
{
|
48
|
+
TaggerContext *tc;
|
49
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
50
|
+
tagger_context_add_lexical_rule( tc, RSTRING_PTR(rule) );
|
51
|
+
return Qnil;
|
52
|
+
}
|
53
|
+
static VALUE
|
54
|
+
BrillTagger_add_word_to_wordlist( VALUE self, VALUE word )
|
55
|
+
{
|
56
|
+
TaggerContext *tc;
|
57
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
58
|
+
tagger_context_add_word_to_wordlist( tc, RSTRING_PTR(word) );
|
59
|
+
return Qnil;
|
60
|
+
}
|
61
|
+
|
62
|
+
static VALUE
|
63
|
+
BrillTagger_add_goodleft( VALUE self, VALUE word )
|
64
|
+
{
|
65
|
+
TaggerContext *tc;
|
66
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
67
|
+
tagger_context_add_goodleft( tc, RSTRING_PTR(word) );
|
68
|
+
return Qnil;
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE
|
72
|
+
BrillTagger_add_goodright( VALUE self, VALUE word )
|
73
|
+
{
|
74
|
+
TaggerContext *tc;
|
75
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
76
|
+
tagger_context_add_goodright( tc, RSTRING_PTR(word) );
|
77
|
+
return Qnil;
|
78
|
+
}
|
79
|
+
|
80
|
+
static VALUE
|
81
|
+
BrillTagger_apply_lexical_rules( VALUE self, VALUE tokens, VALUE tags, VALUE wordlist, VALUE extrawds )
|
82
|
+
{
|
83
|
+
TaggerContext *tc;
|
84
|
+
int i = 0;
|
85
|
+
int token_length = RARRAY(tokens)->len;
|
86
|
+
int tags_length = RARRAY(tags)->len;
|
87
|
+
int rules_length;
|
88
|
+
VALUE fetched;
|
89
|
+
int EXTRAWDS = NUM2INT( extrawds );
|
90
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
91
|
+
|
92
|
+
if( token_length != tags_length ){
|
93
|
+
rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
|
94
|
+
return Qnil;
|
95
|
+
}
|
96
|
+
|
97
|
+
Darray text_array = Darray_create();
|
98
|
+
Darray tag_array = Darray_create();
|
99
|
+
|
100
|
+
Darray_hint( text_array, token_length, token_length );
|
101
|
+
Darray_hint( tag_array, token_length, token_length );
|
102
|
+
|
103
|
+
for( i = 0; i < token_length; ++i ){
|
104
|
+
fetched = rb_ary_entry(tokens,i);
|
105
|
+
if( fetched == Qnil ){
|
106
|
+
fprintf(stderr, "token missing %d of %d\n", i, token_length );
|
107
|
+
rb_raise(rb_eArgError, "Token was missing unexpectedly");
|
108
|
+
return Qnil;
|
109
|
+
}
|
110
|
+
Darray_addh(text_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
|
111
|
+
fetched = rb_ary_entry(tags,i);
|
112
|
+
if( fetched == Qnil ){
|
113
|
+
fprintf(stderr, "tag missing %d of %d\n", i, token_length );
|
114
|
+
rb_raise(rb_eArgError, "Tag was missing unexpectedly");
|
115
|
+
return Qnil;
|
116
|
+
}
|
117
|
+
Darray_addh(tag_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
|
118
|
+
}
|
119
|
+
rules_length = Darray_len(tc->rule_array);
|
120
|
+
/* Apply the rules */
|
121
|
+
for( i = 0; i < rules_length; ++i ) {
|
122
|
+
apply_lexical_rule( Darray_get(tc->rule_array, i),
|
123
|
+
text_array, tag_array,
|
124
|
+
tc->lexicon_hash,
|
125
|
+
tc->wordlist_hash,
|
126
|
+
tc->bigram_hash,
|
127
|
+
EXTRAWDS );
|
128
|
+
}
|
129
|
+
/* Stuff the results back into the ruby arrays */
|
130
|
+
for( i = 0; i < token_length; ++i ) {
|
131
|
+
char *text_strref = (char*)Darray_get( text_array, i );
|
132
|
+
char *tag_strref = (char*)Darray_get( tag_array, i );
|
133
|
+
|
134
|
+
// copy into ruby space
|
135
|
+
rb_ary_store( tokens, i, rb_str_new2(text_strref) );
|
136
|
+
rb_ary_store( tags, i, rb_str_new2( tag_strref ) );
|
137
|
+
|
138
|
+
free( text_strref );
|
139
|
+
free( tag_strref );
|
140
|
+
}
|
141
|
+
|
142
|
+
Darray_destroy(text_array);
|
143
|
+
Darray_destroy(tag_array);
|
144
|
+
|
145
|
+
return Qnil;
|
146
|
+
}
|
147
|
+
static VALUE
|
148
|
+
BrillTagger_default_tag_finish( VALUE self, VALUE tokens, VALUE tags )
|
149
|
+
{
|
150
|
+
int i;
|
151
|
+
VALUE fetched, word;
|
152
|
+
char *tempstr;
|
153
|
+
int token_length = RARRAY(tokens)->len;
|
154
|
+
int tags_length = RARRAY(tags)->len;
|
155
|
+
TaggerContext *tc;
|
156
|
+
|
157
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
158
|
+
|
159
|
+
if( token_length != tags_length ){
|
160
|
+
rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
|
161
|
+
return Qnil;
|
162
|
+
}
|
163
|
+
|
164
|
+
for( i = 0; i < token_length; ++i ){
|
165
|
+
fetched = rb_ary_entry(tokens,i);
|
166
|
+
if( fetched == Qnil ){
|
167
|
+
rb_raise(rb_eArgError, "Token was missing unexpectedly");
|
168
|
+
return Qnil;
|
169
|
+
}
|
170
|
+
word = fetched;
|
171
|
+
|
172
|
+
if( (tempstr = Registry_get(tc->lexicon_hash, RSTRING_PTR(word))) != NULL ){
|
173
|
+
//fetched = rb_ary_entry(tags,i);
|
174
|
+
//printf( "'%s'/%s -> %s\n", RSTRING_PTR(word), RSTRING_PTR(fetched), tempstr );
|
175
|
+
rb_ary_store( tags, i, rb_str_new2(tempstr) );
|
176
|
+
}
|
177
|
+
}
|
178
|
+
return Qnil;
|
179
|
+
}
|
180
|
+
|
181
|
+
static VALUE
|
182
|
+
BrillTagger_apply_contextual_rules( VALUE self, VALUE tokens, VALUE tags, VALUE rmove )
|
183
|
+
{
|
184
|
+
int i;
|
185
|
+
int token_length = RARRAY(tokens)->len;
|
186
|
+
int tags_length = RARRAY(tags)->len;
|
187
|
+
int rules_length;
|
188
|
+
int restrict_move = NUM2INT( rmove );
|
189
|
+
char **text_tags, **text_tokens;
|
190
|
+
VALUE fetched;
|
191
|
+
TaggerContext *tc;
|
192
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
193
|
+
|
194
|
+
if( token_length != tags_length ){
|
195
|
+
rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
|
196
|
+
return Qnil;
|
197
|
+
}
|
198
|
+
if( restrict_move && Registry_entry_count( tc->lexicon_hash ) == 0 ){
|
199
|
+
rb_raise(rb_eArgError, "Must load a leicon before applying contextual rules");
|
200
|
+
return Qnil;
|
201
|
+
}
|
202
|
+
|
203
|
+
text_tags = (char**)malloc(sizeof(char*) * tags_length );
|
204
|
+
text_tokens = (char**)malloc(sizeof(char*) * token_length );
|
205
|
+
|
206
|
+
// load the tokens and tags into the char * arrays
|
207
|
+
for( i = 0; i < token_length; ++i ){
|
208
|
+
fetched = rb_ary_entry(tokens,i);
|
209
|
+
text_tokens[i] = strdup(RSTRING_PTR(fetched));
|
210
|
+
fetched = rb_ary_entry(tags,i);
|
211
|
+
text_tags[i] = strdup(RSTRING_PTR(fetched));
|
212
|
+
}
|
213
|
+
|
214
|
+
rules_length = Darray_len(tc->contextual_rule_array);
|
215
|
+
// Apply the rules
|
216
|
+
for( i = 0; i < rules_length; ++i ){
|
217
|
+
apply_contextual_rule(Darray_get(tc->contextual_rule_array, i),
|
218
|
+
text_tokens, text_tags, token_length,
|
219
|
+
restrict_move, tc->lexicon_hash, tc->lexicon_tag_hash);
|
220
|
+
}
|
221
|
+
|
222
|
+
// load the results back into ruby arrays
|
223
|
+
for( i = 0; i < token_length; ++i ){
|
224
|
+
rb_ary_store( tags, i, rb_str_new2(text_tags[i]) );
|
225
|
+
free(text_tags[i]);
|
226
|
+
free(text_tokens[i]);
|
227
|
+
}
|
228
|
+
|
229
|
+
free( text_tags );
|
230
|
+
free( text_tokens );
|
231
|
+
|
232
|
+
return Qnil;
|
233
|
+
}
|
234
|
+
|
235
|
+
void Init_rule_tagger()
|
236
|
+
{
|
237
|
+
rb_Tagger = rb_define_module( "Tagger" );
|
238
|
+
rb_BrillTagger = rb_define_class_under( rb_Tagger, "BrillTagger", rb_cObject );
|
239
|
+
|
240
|
+
rb_define_alloc_func( rb_BrillTagger, BrillTagger_alloc );
|
241
|
+
|
242
|
+
rb_define_method( rb_BrillTagger, "add_to_lexicon", BrillTagger_add_to_lexicon, 2 );
|
243
|
+
rb_define_method( rb_BrillTagger, "add_to_lexicon_tags", BrillTagger_add_to_lexicon_tags, 1 );
|
244
|
+
rb_define_method( rb_BrillTagger, "add_lexical_rule", BrillTagger_add_lexical_rule, 1 );
|
245
|
+
rb_define_method( rb_BrillTagger, "add_contextual_rule", BrillTagger_add_contextual_rule, 1 );
|
246
|
+
rb_define_method( rb_BrillTagger, "add_word_to_wordlist", BrillTagger_add_word_to_wordlist, 1 );
|
247
|
+
rb_define_method( rb_BrillTagger, "add_goodleft", BrillTagger_add_goodleft, 1 );
|
248
|
+
rb_define_method( rb_BrillTagger, "add_goodright", BrillTagger_add_goodright, 1 );
|
249
|
+
rb_define_method( rb_BrillTagger, "apply_lexical_rules", BrillTagger_apply_lexical_rules, 4 );
|
250
|
+
rb_define_method( rb_BrillTagger, "default_tag_finish", BrillTagger_default_tag_finish, 2 );
|
251
|
+
rb_define_method( rb_BrillTagger, "apply_contextual_rules", BrillTagger_apply_contextual_rules, 3 );
|
252
|
+
}
|
@@ -0,0 +1,326 @@
|
|
1
|
+
#include <stddef.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <assert.h>
|
6
|
+
#include <ctype.h>
|
7
|
+
|
8
|
+
#include "sysdep.h"
|
9
|
+
#include "memory.h"
|
10
|
+
#include "bool.h"
|
11
|
+
#include "useful.h"
|
12
|
+
|
13
|
+
#include "registryP.h"
|
14
|
+
|
15
|
+
/* Creates and returns and empty directory */
|
16
|
+
|
17
|
+
Registry Registry_create(compare_func, hash_func)
|
18
|
+
Registry_CompareFunc compare_func;
|
19
|
+
Registry_HashFunc hash_func;
|
20
|
+
{
|
21
|
+
Registry_rep *temp = create();
|
22
|
+
|
23
|
+
temp->hash_table = NULL;
|
24
|
+
temp->ht_size = (unsigned int)0;
|
25
|
+
temp->comp_fun = compare_func;
|
26
|
+
temp->hash_fun = hash_func;
|
27
|
+
temp->record_count = (unsigned int)0;
|
28
|
+
Registry_size_hint(raise(temp), DEFAULT_HT_SIZE);
|
29
|
+
return raise(temp);
|
30
|
+
}
|
31
|
+
|
32
|
+
/* Deal with the expected size value. */
|
33
|
+
|
34
|
+
NORET Registry_size_hint(dir, size_hint_value)
|
35
|
+
Registry dir;
|
36
|
+
unsigned int size_hint_value;
|
37
|
+
{
|
38
|
+
int i;
|
39
|
+
if (lower(dir)->record_count != (unsigned int)0) return;
|
40
|
+
if (lower(dir)->ht_size != (unsigned int)0)
|
41
|
+
Memory_free((VOIDP)lower(dir)->hash_table);
|
42
|
+
lower(dir)->ht_size = size_hint_value;
|
43
|
+
lower(dir)->hash_table =
|
44
|
+
(RegistryRecord **)Memory_allocate(sizeof(RegistryRecord *)
|
45
|
+
* size_hint_value);
|
46
|
+
for (i=0; i < size_hint_value; ++i)
|
47
|
+
*(lower(dir)->hash_table + i) = (RegistryRecord *)NULL;
|
48
|
+
}
|
49
|
+
|
50
|
+
/* Finds a named object in a directory. Returns NULL if the named
|
51
|
+
* object is not in the directory */
|
52
|
+
|
53
|
+
VOIDP Registry_get(dir, key)
|
54
|
+
Registry dir;
|
55
|
+
CONSTVOIDP key;
|
56
|
+
{
|
57
|
+
RegistryRecord *p;
|
58
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
59
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
60
|
+
|
61
|
+
assert(comp_func);
|
62
|
+
for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
|
63
|
+
p != NULL;
|
64
|
+
p = p->next) {
|
65
|
+
if ((*comp_func)(key, p->name) == 0)
|
66
|
+
return p->obj;
|
67
|
+
}
|
68
|
+
return NULL; /* not found */
|
69
|
+
}
|
70
|
+
|
71
|
+
/* Finds a named object in a directory and returnd the original key */
|
72
|
+
/* used to index that object. Returns NULL if the named object is */
|
73
|
+
/* not in the directory. This is useful for getting the original */
|
74
|
+
/* string used to make an entry into a registry in order to free it. */
|
75
|
+
/* In this case, a pointer to the name should be storred, then the */
|
76
|
+
/* entry should be removed using Registry_remove, then the key may be */
|
77
|
+
/* freed */
|
78
|
+
|
79
|
+
VOIDP Registry_get_original_key(dir, key)
|
80
|
+
Registry dir;
|
81
|
+
CONSTVOIDP key;
|
82
|
+
{
|
83
|
+
RegistryRecord *p;
|
84
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
85
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
86
|
+
|
87
|
+
for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
|
88
|
+
p != NULL;
|
89
|
+
p = p->next) {
|
90
|
+
if ((*comp_func)(key, p->name) == 0)
|
91
|
+
return p->name;
|
92
|
+
}
|
93
|
+
return NULL; /* not found */
|
94
|
+
}
|
95
|
+
|
96
|
+
/* Adds a named object to a directory. Returns Bool_TRUE unless an error occurs.
|
97
|
+
* An error will occur if Registry_get(dir, name) would succeed (return
|
98
|
+
* non-NULL) */
|
99
|
+
|
100
|
+
Bool Registry_add(dir, name, obj)
|
101
|
+
Registry dir;
|
102
|
+
VOIDP name;
|
103
|
+
VOIDP obj;
|
104
|
+
{
|
105
|
+
RegistryRecord *p;
|
106
|
+
RegistryRecord **table_entry;
|
107
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
108
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
109
|
+
|
110
|
+
table_entry = lower(dir)->hash_table + (*hash_func)(name, lower(dir)->ht_size);
|
111
|
+
|
112
|
+
for (p = *table_entry;
|
113
|
+
p != NULL;
|
114
|
+
p = p->next) {
|
115
|
+
if ((*comp_func)(name, p->name) == 0)
|
116
|
+
return Bool_FALSE;
|
117
|
+
}
|
118
|
+
|
119
|
+
p = (RegistryRecord *)Memory_allocate(sizeof(RegistryRecord));
|
120
|
+
p->next = *table_entry;
|
121
|
+
p->name = name;
|
122
|
+
p->obj = obj;
|
123
|
+
*table_entry = p;
|
124
|
+
++(lower(dir)->record_count);
|
125
|
+
return Bool_TRUE;
|
126
|
+
}
|
127
|
+
|
128
|
+
/* Removes a named object from the directory. Returns Bool_TRUE unless an
|
129
|
+
* error occurs (Bool_FALSE if an error does occur). The object is
|
130
|
+
* not freed. It is the responsibility of the
|
131
|
+
* caller to do so if necessary.
|
132
|
+
*/
|
133
|
+
|
134
|
+
Bool Registry_remove(dir, key)
|
135
|
+
Registry dir;
|
136
|
+
CONSTVOIDP key;
|
137
|
+
{
|
138
|
+
RegistryRecord *p, **prev_p;
|
139
|
+
Registry_rep *ldir = lower(dir);
|
140
|
+
Registry_CompareFunc comp_func = ldir->comp_fun;
|
141
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
142
|
+
|
143
|
+
prev_p = lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size);
|
144
|
+
while ((p = *prev_p) != NULL) {
|
145
|
+
if ((*comp_func)(key, p->name) == 0) {
|
146
|
+
*prev_p = p->next;
|
147
|
+
Memory_free((VOIDP)p);
|
148
|
+
--(ldir->record_count);
|
149
|
+
return Bool_TRUE;
|
150
|
+
}
|
151
|
+
prev_p = &(p->next);
|
152
|
+
}
|
153
|
+
return Bool_FALSE;
|
154
|
+
}
|
155
|
+
|
156
|
+
/* Replaces an association in the registry. If an association with the
|
157
|
+
* given key already exists, the value is changed to new_value, and the
|
158
|
+
* old value is returned. If no association already exists, one is added
|
159
|
+
* and NULL is returned. */
|
160
|
+
|
161
|
+
VOIDP Registry_replace_value(dir, key, new_value)
|
162
|
+
Registry dir;
|
163
|
+
VOIDP key;
|
164
|
+
VOIDP new_value;
|
165
|
+
{
|
166
|
+
RegistryRecord *p;
|
167
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
168
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
169
|
+
|
170
|
+
VOIDP temp_obj;
|
171
|
+
for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
|
172
|
+
p != NULL;
|
173
|
+
p = p->next) {
|
174
|
+
if ((*comp_func)(key, p->name) == 0) {
|
175
|
+
temp_obj = p->obj;
|
176
|
+
p->obj = new_value;
|
177
|
+
return temp_obj;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
Registry_add(dir, key, new_value);
|
181
|
+
return NULL; /* not found */
|
182
|
+
}
|
183
|
+
|
184
|
+
NORET Registry_traverse(dir, action, priv_ptr)
|
185
|
+
Registry dir;
|
186
|
+
Registry_ActionProc action;
|
187
|
+
VOIDP priv_ptr;
|
188
|
+
{
|
189
|
+
RegistryRecord *p;
|
190
|
+
int i;
|
191
|
+
|
192
|
+
for (i = 0; i < lower(dir)->ht_size; ++i)
|
193
|
+
for (p = *(lower(dir)->hash_table + i);
|
194
|
+
p != NULL;
|
195
|
+
p = p->next) {
|
196
|
+
(*action)(p->name, p->obj, priv_ptr);
|
197
|
+
}
|
198
|
+
return;
|
199
|
+
}
|
200
|
+
|
201
|
+
unsigned int Registry_entry_count(dir)
|
202
|
+
Registry dir;
|
203
|
+
{
|
204
|
+
return lower(dir)->record_count;
|
205
|
+
}
|
206
|
+
|
207
|
+
static NORET add_to_darrays(key, value, priv_ptr)
|
208
|
+
VOIDP key;
|
209
|
+
VOIDP value;
|
210
|
+
VOIDP priv_ptr;
|
211
|
+
{
|
212
|
+
struct darray_pair *dapp = (struct darray_pair *) priv_ptr;
|
213
|
+
|
214
|
+
if (dapp->key_darray != NULL)
|
215
|
+
Darray_addh(dapp->key_darray, (VOIDP)key); /* Specs forbid mods to key */
|
216
|
+
if (dapp->value_darray != NULL)
|
217
|
+
Darray_addh(dapp->value_darray, value);
|
218
|
+
}
|
219
|
+
|
220
|
+
NORET Registry_fetch_contents(dir, key_darray, value_darray)
|
221
|
+
Registry dir;
|
222
|
+
Darray key_darray, value_darray;
|
223
|
+
{
|
224
|
+
struct darray_pair dap;
|
225
|
+
|
226
|
+
assert (key_darray == NULL || Darray_len(key_darray) == 0);
|
227
|
+
assert (value_darray == NULL || Darray_len(value_darray) == 0);
|
228
|
+
|
229
|
+
dap.key_darray = key_darray;
|
230
|
+
dap.value_darray = value_darray;
|
231
|
+
|
232
|
+
Registry_traverse(dir, add_to_darrays, (VOIDP)&dap);
|
233
|
+
}
|
234
|
+
|
235
|
+
NORET Registry_destroy(dir)
|
236
|
+
Registry dir;
|
237
|
+
{
|
238
|
+
RegistryRecord *p, *next;
|
239
|
+
int i;
|
240
|
+
|
241
|
+
for (i = 0; i < lower(dir)->ht_size; ++i)
|
242
|
+
for (p = *(lower(dir)->hash_table + i);
|
243
|
+
p != NULL;
|
244
|
+
p = next) {
|
245
|
+
next = p->next;
|
246
|
+
Memory_free((VOIDP)p);
|
247
|
+
}
|
248
|
+
Memory_free((VOIDP)lower(dir)->hash_table);
|
249
|
+
destroy(lower(dir));
|
250
|
+
}
|
251
|
+
|
252
|
+
unsigned int Registry_ptrhash(ptr, htsize)
|
253
|
+
CONSTVOIDP ptr;
|
254
|
+
unsigned int htsize;
|
255
|
+
{
|
256
|
+
unsigned int uns_int_ptr = (unsigned int)ptr;
|
257
|
+
|
258
|
+
if ((int)uns_int_ptr > 0)
|
259
|
+
return htsize ? (int)uns_int_ptr % (int)htsize : 0;
|
260
|
+
else
|
261
|
+
return htsize ? uns_int_ptr % htsize : 0;
|
262
|
+
}
|
263
|
+
|
264
|
+
unsigned int Registry_strhash(strp, htsize)
|
265
|
+
CONSTVOIDP strp;
|
266
|
+
unsigned int htsize;
|
267
|
+
{
|
268
|
+
char *cp = (char *)strp;
|
269
|
+
int hash_temp = 0;
|
270
|
+
|
271
|
+
while (*cp != '\0') {
|
272
|
+
if (hash_temp < 0)
|
273
|
+
hash_temp = (hash_temp << 1) +1;
|
274
|
+
else
|
275
|
+
hash_temp = hash_temp << 1;
|
276
|
+
hash_temp ^= *cp;
|
277
|
+
++cp;
|
278
|
+
}
|
279
|
+
return htsize ? ((unsigned int)hash_temp) % htsize : 0;
|
280
|
+
}
|
281
|
+
|
282
|
+
unsigned int Registry_strcasehash(strp, htsize)
|
283
|
+
CONSTVOIDP strp;
|
284
|
+
unsigned int htsize;
|
285
|
+
{
|
286
|
+
char *cp = (char *)strp;
|
287
|
+
int hash_temp = 0;
|
288
|
+
|
289
|
+
while (*cp != '\0') {
|
290
|
+
if (hash_temp < 0)
|
291
|
+
hash_temp = (hash_temp << 1) +1;
|
292
|
+
else
|
293
|
+
hash_temp = hash_temp << 1;
|
294
|
+
if (isalpha(*cp) && isupper(*cp))
|
295
|
+
hash_temp ^= tolower(*cp);
|
296
|
+
else
|
297
|
+
hash_temp ^= *cp;
|
298
|
+
++cp;
|
299
|
+
}
|
300
|
+
return htsize ? ((unsigned int)hash_temp) % htsize : 0;
|
301
|
+
}
|
302
|
+
|
303
|
+
int Registry_strcmp(str1, str2)
|
304
|
+
CONSTVOIDP str1;
|
305
|
+
CONSTVOIDP str2;
|
306
|
+
{
|
307
|
+
return strcmp((char *)str1, (char *)str2);
|
308
|
+
}
|
309
|
+
|
310
|
+
int Registry_strcasecmp(str1, str2)
|
311
|
+
CONSTVOIDP str1;
|
312
|
+
CONSTVOIDP str2;
|
313
|
+
{
|
314
|
+
return strcasecmp((char *)str1, (char *)str2);
|
315
|
+
}
|
316
|
+
|
317
|
+
int Registry_ptrcmp(ptr1, ptr2)
|
318
|
+
CONSTVOIDP ptr1;
|
319
|
+
CONSTVOIDP ptr2;
|
320
|
+
{
|
321
|
+
if (ptr1==ptr2)
|
322
|
+
return 0;
|
323
|
+
else
|
324
|
+
return 1;
|
325
|
+
}
|
326
|
+
|
@@ -0,0 +1,129 @@
|
|
1
|
+
#ifndef _registry_h_
|
2
|
+
#define _registry_h_
|
3
|
+
|
4
|
+
#include "sysdep.h"
|
5
|
+
#include "bool.h"
|
6
|
+
#include "darray.h"
|
7
|
+
|
8
|
+
typedef struct Registry_st *Registry;
|
9
|
+
|
10
|
+
#ifdef __STDC__
|
11
|
+
typedef unsigned int (*Registry_HashFunc)(CONSTVOIDP, unsigned int);
|
12
|
+
typedef int (*Registry_CompareFunc)(CONSTVOIDP, CONSTVOIDP);
|
13
|
+
typedef NORET (*Registry_ActionProc)(VOIDP, VOIDP, VOIDP);
|
14
|
+
extern Registry Registry_create(Registry_CompareFunc, Registry_HashFunc);
|
15
|
+
extern NORET Registry_size_hint(Registry, unsigned int);
|
16
|
+
extern Bool Registry_add(Registry, VOIDP, VOIDP);
|
17
|
+
extern Bool Registry_remove(Registry, CONSTVOIDP);
|
18
|
+
extern VOIDP Registry_get(Registry, CONSTVOIDP);
|
19
|
+
extern VOIDP Registry_get_original_key(Registry, CONSTVOIDP);
|
20
|
+
extern VOIDP Registry_replace_value(Registry, VOIDP, VOIDP);
|
21
|
+
extern NORET Registry_traverse(Registry, Registry_ActionProc, VOIDP);
|
22
|
+
extern unsigned int Registry_entry_count(Registry);
|
23
|
+
extern NORET Registry_fetch_contents(Registry, Darray, Darray);
|
24
|
+
extern NORET Registry_destroy(Registry);
|
25
|
+
extern int Registry_ptrcmp(CONSTVOIDP, CONSTVOIDP);
|
26
|
+
extern unsigned int Registry_ptrhash(CONSTVOIDP, unsigned int);
|
27
|
+
extern int Registry_strcmp(CONSTVOIDP, CONSTVOIDP);
|
28
|
+
extern unsigned int Registry_strhash(CONSTVOIDP, unsigned int);
|
29
|
+
extern int Registry_strcasecmp(CONSTVOIDP, CONSTVOIDP);
|
30
|
+
extern unsigned int Registry_strcasehash(CONSTVOIDP, unsigned int);
|
31
|
+
#else
|
32
|
+
typedef unsigned int (*Registry_HashFunc)();
|
33
|
+
typedef int (*Registry_CompareFunc)();
|
34
|
+
typedef void (*Registry_ActionProc)();
|
35
|
+
extern Registry Registry_create();
|
36
|
+
extern NORET Registry_size_hint();
|
37
|
+
extern int Registry_add();
|
38
|
+
extern int Registry_remove();
|
39
|
+
extern VOIDP Registry_get();
|
40
|
+
extern VOIDP Registry_get_original_key();
|
41
|
+
extern VOIDP Registry_replace_value();
|
42
|
+
extern NORET Registry_traverse();
|
43
|
+
extern unsigned int Registry_entry_count();
|
44
|
+
extern NORET Registry_fetch_contents();
|
45
|
+
extern NORET Registry_destroy();
|
46
|
+
extern int Registry_ptrcmp();
|
47
|
+
extern unsigned int Registry_ptrhash();
|
48
|
+
extern int Registry_strcmp();
|
49
|
+
extern unsigned int Registry_strhash();
|
50
|
+
extern int Registry_strcasecmp();
|
51
|
+
extern unsigned int Registry_strcasehash();
|
52
|
+
#endif /* __STDC__ */
|
53
|
+
|
54
|
+
|
55
|
+
/*
|
56
|
+
* Registry_create(compare_func, hash_func)
|
57
|
+
* Creates and returns an empty registry. compare_func is used
|
58
|
+
* to compare items in the registry. It should return 0 if its
|
59
|
+
* arguments are to be considered equal. hash_func should return
|
60
|
+
* a number between 0 and its second argument, and should attempt
|
61
|
+
* an even distribution. If compare_func
|
62
|
+
* would return 0 for a pair of objects, hash_fuct should return
|
63
|
+
* the same value for those objects. For registries of abstract
|
64
|
+
* objects (pointers), Registry_ptrcmp() and Registry_ptrhash() should
|
65
|
+
* be passed as the compare_func and hash_func. Registry_strcmp and
|
66
|
+
* Registry_strhash() may be used for strings. Registry_strcasecmp and
|
67
|
+
* Registry_strcasehash() may be used for strings where case is not
|
68
|
+
* significant (case-insensitive).
|
69
|
+
*
|
70
|
+
* Registry_size_hint(registry, size_hint_value)
|
71
|
+
* The registry may operate more efficiently if this operator is called
|
72
|
+
* and size_hint is close to the maximum number of elements to be in
|
73
|
+
* the Registry, at the possible cost of additional memory use. Likely
|
74
|
+
* to be effective only on an empty registry.
|
75
|
+
*
|
76
|
+
* Registry_add(registry, key, value)
|
77
|
+
* Adds the association between key and value to the registry. Neither
|
78
|
+
* key nor value are copied, and neither may be freed before being removed
|
79
|
+
* from the registry. The key should not be modified in way that would
|
80
|
+
* change the value of the compare_func or the hash_func until this
|
81
|
+
* association is removed from the registry.
|
82
|
+
* Will return Bool_FALSE if an association with the
|
83
|
+
* same key is already in the registry (in which case the add will not be
|
84
|
+
* performed), Bool_TRUE otherwise (on successful completion).
|
85
|
+
*
|
86
|
+
* Registry_remove(registry, key)
|
87
|
+
* Removes the association with key from the registry. Returns Bool_FALSE
|
88
|
+
* if no such association exists, Bool_TRUE otherwise (on successful
|
89
|
+
* completion)
|
90
|
+
*
|
91
|
+
* Registry_get(registry, key)
|
92
|
+
* Returns the value associated with key in the registry. Returns NULL
|
93
|
+
* if there is no such association.
|
94
|
+
*
|
95
|
+
* Registry_get_original_key(registry, key) (added by Rich Pito 7/91)
|
96
|
+
* Finds a named object in a directory and returnd the original key
|
97
|
+
* used to index that object. Returns NULL if the named object is
|
98
|
+
* not in the directory. This is useful for getting the original
|
99
|
+
* string used to make an entry into a registry in order to free it.
|
100
|
+
* In this case, a pointer to the name should be storred, then the
|
101
|
+
* entry should be removed using Registry_remove, then the key may be
|
102
|
+
* freed
|
103
|
+
*
|
104
|
+
* Registry_traverse(registry, action_proc, private_pointer)
|
105
|
+
* Calls action_proc once for each entry in the registry. private_pointer
|
106
|
+
* is a VOIDP which is passed to the action_proc, but not otherwise used.
|
107
|
+
* action_proc should not modify the registry in any way. action_proc takes
|
108
|
+
* three arguments, the key, the value, and private_pointer.
|
109
|
+
*
|
110
|
+
* Registry_entry_count(registry)
|
111
|
+
* Returns the number of associations in the registry.
|
112
|
+
*
|
113
|
+
* Registry_fetch_contents(registry, key_darray, value_darray)
|
114
|
+
* Stores the contents of the registry as follows: In no particular
|
115
|
+
* order, each association is processed in turn by storing (using Darray_addh)
|
116
|
+
* the key into key_darray and the value into value_darray. Either
|
117
|
+
* or both key_darray and/or value_darray may be NULL, in which case
|
118
|
+
* the corresponding data will not be processed. Actual Darrays passed
|
119
|
+
* (not NULL) must be empty. Any objects added to key_darray must be
|
120
|
+
* treated as read-only as long as they remain in the registry.
|
121
|
+
*
|
122
|
+
* Registry_destroy(registry)
|
123
|
+
* Deallocates all resources needed by the registry. Should be the last
|
124
|
+
* operation performed on the registry. Does not deallocate the objects
|
125
|
+
* (keys and values) contained in the registry (this should be done after
|
126
|
+
* the registry is destroyed). Implicitly removes all associations
|
127
|
+
* from the registry.
|
128
|
+
*/
|
129
|
+
#endif /* _resgistry_h_ */
|