ruletagger 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/COPYING +21 -0
  2. data/History.txt +4 -0
  3. data/LICENSE +21 -0
  4. data/License.txt +20 -0
  5. data/Manifest.txt +75 -0
  6. data/PostInstall.txt +7 -0
  7. data/README +7 -0
  8. data/README.txt +53 -0
  9. data/Rakefile +33 -0
  10. data/config/hoe.rb +75 -0
  11. data/config/requirements.rb +15 -0
  12. data/ext/rule_tagger/bool.h +38 -0
  13. data/ext/rule_tagger/darray.c +292 -0
  14. data/ext/rule_tagger/darray.h +125 -0
  15. data/ext/rule_tagger/darrayP.h +50 -0
  16. data/ext/rule_tagger/extconf.rb +14 -0
  17. data/ext/rule_tagger/lex.c +170 -0
  18. data/ext/rule_tagger/lex.h +49 -0
  19. data/ext/rule_tagger/memory.c +127 -0
  20. data/ext/rule_tagger/memory.h +20 -0
  21. data/ext/rule_tagger/rbtagger.c +252 -0
  22. data/ext/rule_tagger/registry.c +326 -0
  23. data/ext/rule_tagger/registry.h +129 -0
  24. data/ext/rule_tagger/registryP.h +46 -0
  25. data/ext/rule_tagger/ruby-compat.h +20 -0
  26. data/ext/rule_tagger/rules.c +525 -0
  27. data/ext/rule_tagger/rules.h +42 -0
  28. data/ext/rule_tagger/sysdep.h +20 -0
  29. data/ext/rule_tagger/tagger.c +110 -0
  30. data/ext/rule_tagger/tagger.h +46 -0
  31. data/ext/rule_tagger/useful.c +44 -0
  32. data/ext/rule_tagger/useful.h +51 -0
  33. data/ext/word_tagger/extconf.rb +7 -0
  34. data/ext/word_tagger/porter_stemmer.c +430 -0
  35. data/ext/word_tagger/porter_stemmer.h +19 -0
  36. data/ext/word_tagger/rtagger.cc +83 -0
  37. data/ext/word_tagger/tagger.cc +153 -0
  38. data/ext/word_tagger/tagger.h +27 -0
  39. data/ext/word_tagger/tagger.rb +8 -0
  40. data/ext/word_tagger/test/Makefile +22 -0
  41. data/ext/word_tagger/test/doc.txt +87 -0
  42. data/ext/word_tagger/test/test.cc +107 -0
  43. data/ext/word_tagger/test.rb +31 -0
  44. data/lib/brill/tagger.rb +225 -0
  45. data/lib/rbtagger/version.rb +9 -0
  46. data/lib/rbtagger.rb +6 -0
  47. data/script/console +10 -0
  48. data/script/destroy +14 -0
  49. data/script/generate +14 -0
  50. data/script/txt2html +82 -0
  51. data/setup.rb +1585 -0
  52. data/tasks/deployment.rake +34 -0
  53. data/tasks/environment.rake +7 -0
  54. data/tasks/website.rake +17 -0
  55. data/test/CONTEXTUALRULEFILE +284 -0
  56. data/test/LEXICALRULEFILE +148 -0
  57. data/test/LEXICON +93696 -0
  58. data/test/docs/doc0.txt +20 -0
  59. data/test/docs/doc1.txt +11 -0
  60. data/test/docs/doc2.txt +52 -0
  61. data/test/docs/doc3.txt +128 -0
  62. data/test/docs/doc4.txt +337 -0
  63. data/test/docs/doc5.txt +497 -0
  64. data/test/docs/doc6.txt +116 -0
  65. data/test/docs/doc7.txt +101 -0
  66. data/test/docs/doc8.txt +25 -0
  67. data/test/docs/doc9.txt +84 -0
  68. data/test/tagger_test.rb +60 -0
  69. data/test/test_helper.rb +2 -0
  70. data/tools/rakehelp.rb +113 -0
  71. data/website/index.html +113 -0
  72. data/website/index.txt +53 -0
  73. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  74. data/website/stylesheets/screen.css +138 -0
  75. data/website/template.html.erb +48 -0
  76. metadata +155 -0
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Copyright (c) 2008 Todd A. Fisher
3
+ * see LICENSE
4
+ */
5
+ #include "ruby.h"
6
+ #include "tagger.h"
7
+ #include "ruby-compat.h"
8
+
9
+ static VALUE rb_Tagger;
10
+ static VALUE rb_BrillTagger;
11
+
12
+ static
13
+ VALUE BrillTagger_alloc(VALUE klass)
14
+ {
15
+ VALUE object;
16
+ TaggerContext *tc = tagger_context_new();
17
+ object = Data_Wrap_Struct( klass, NULL, tagger_context_free, tc );
18
+ return object;
19
+ }
20
+
21
+ static VALUE
22
+ BrillTagger_add_to_lexicon( VALUE self, VALUE word, VALUE tag )
23
+ {
24
+ TaggerContext *tc;
25
+ Data_Get_Struct( self, TaggerContext, tc );
26
+ tagger_context_add_to_lexicon( tc, RSTRING_PTR(word), RSTRING_PTR(tag) );
27
+ return Qnil;
28
+ }
29
+ static VALUE
30
+ BrillTagger_add_to_lexicon_tags( VALUE self, VALUE bigram )
31
+ {
32
+ TaggerContext *tc;
33
+ Data_Get_Struct( self, TaggerContext, tc );
34
+ tagger_context_add_to_lexicon_tags( tc, RSTRING_PTR(bigram) );
35
+ return Qnil;
36
+ }
37
+ static VALUE
38
+ BrillTagger_add_contextual_rule( VALUE self, VALUE rule )
39
+ {
40
+ TaggerContext *tc;
41
+ Data_Get_Struct( self, TaggerContext, tc );
42
+ tagger_context_add_contextual_rule( tc, RSTRING_PTR(rule) );
43
+ return Qnil;
44
+ }
45
+ static VALUE
46
+ BrillTagger_add_lexical_rule( VALUE self, VALUE rule )
47
+ {
48
+ TaggerContext *tc;
49
+ Data_Get_Struct( self, TaggerContext, tc );
50
+ tagger_context_add_lexical_rule( tc, RSTRING_PTR(rule) );
51
+ return Qnil;
52
+ }
53
+ static VALUE
54
+ BrillTagger_add_word_to_wordlist( VALUE self, VALUE word )
55
+ {
56
+ TaggerContext *tc;
57
+ Data_Get_Struct( self, TaggerContext, tc );
58
+ tagger_context_add_word_to_wordlist( tc, RSTRING_PTR(word) );
59
+ return Qnil;
60
+ }
61
+
62
+ static VALUE
63
+ BrillTagger_add_goodleft( VALUE self, VALUE word )
64
+ {
65
+ TaggerContext *tc;
66
+ Data_Get_Struct( self, TaggerContext, tc );
67
+ tagger_context_add_goodleft( tc, RSTRING_PTR(word) );
68
+ return Qnil;
69
+ }
70
+
71
+ static VALUE
72
+ BrillTagger_add_goodright( VALUE self, VALUE word )
73
+ {
74
+ TaggerContext *tc;
75
+ Data_Get_Struct( self, TaggerContext, tc );
76
+ tagger_context_add_goodright( tc, RSTRING_PTR(word) );
77
+ return Qnil;
78
+ }
79
+
80
+ static VALUE
81
+ BrillTagger_apply_lexical_rules( VALUE self, VALUE tokens, VALUE tags, VALUE wordlist, VALUE extrawds )
82
+ {
83
+ TaggerContext *tc;
84
+ int i = 0;
85
+ int token_length = RARRAY(tokens)->len;
86
+ int tags_length = RARRAY(tags)->len;
87
+ int rules_length;
88
+ VALUE fetched;
89
+ int EXTRAWDS = NUM2INT( extrawds );
90
+ Data_Get_Struct( self, TaggerContext, tc );
91
+
92
+ if( token_length != tags_length ){
93
+ rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
94
+ return Qnil;
95
+ }
96
+
97
+ Darray text_array = Darray_create();
98
+ Darray tag_array = Darray_create();
99
+
100
+ Darray_hint( text_array, token_length, token_length );
101
+ Darray_hint( tag_array, token_length, token_length );
102
+
103
+ for( i = 0; i < token_length; ++i ){
104
+ fetched = rb_ary_entry(tokens,i);
105
+ if( fetched == Qnil ){
106
+ fprintf(stderr, "token missing %d of %d\n", i, token_length );
107
+ rb_raise(rb_eArgError, "Token was missing unexpectedly");
108
+ return Qnil;
109
+ }
110
+ Darray_addh(text_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
111
+ fetched = rb_ary_entry(tags,i);
112
+ if( fetched == Qnil ){
113
+ fprintf(stderr, "tag missing %d of %d\n", i, token_length );
114
+ rb_raise(rb_eArgError, "Tag was missing unexpectedly");
115
+ return Qnil;
116
+ }
117
+ Darray_addh(tag_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
118
+ }
119
+ rules_length = Darray_len(tc->rule_array);
120
+ /* Apply the rules */
121
+ for( i = 0; i < rules_length; ++i ) {
122
+ apply_lexical_rule( Darray_get(tc->rule_array, i),
123
+ text_array, tag_array,
124
+ tc->lexicon_hash,
125
+ tc->wordlist_hash,
126
+ tc->bigram_hash,
127
+ EXTRAWDS );
128
+ }
129
+ /* Stuff the results back into the ruby arrays */
130
+ for( i = 0; i < token_length; ++i ) {
131
+ char *text_strref = (char*)Darray_get( text_array, i );
132
+ char *tag_strref = (char*)Darray_get( tag_array, i );
133
+
134
+ // copy into ruby space
135
+ rb_ary_store( tokens, i, rb_str_new2(text_strref) );
136
+ rb_ary_store( tags, i, rb_str_new2( tag_strref ) );
137
+
138
+ free( text_strref );
139
+ free( tag_strref );
140
+ }
141
+
142
+ Darray_destroy(text_array);
143
+ Darray_destroy(tag_array);
144
+
145
+ return Qnil;
146
+ }
147
+ static VALUE
148
+ BrillTagger_default_tag_finish( VALUE self, VALUE tokens, VALUE tags )
149
+ {
150
+ int i;
151
+ VALUE fetched, word;
152
+ char *tempstr;
153
+ int token_length = RARRAY(tokens)->len;
154
+ int tags_length = RARRAY(tags)->len;
155
+ TaggerContext *tc;
156
+
157
+ Data_Get_Struct( self, TaggerContext, tc );
158
+
159
+ if( token_length != tags_length ){
160
+ rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
161
+ return Qnil;
162
+ }
163
+
164
+ for( i = 0; i < token_length; ++i ){
165
+ fetched = rb_ary_entry(tokens,i);
166
+ if( fetched == Qnil ){
167
+ rb_raise(rb_eArgError, "Token was missing unexpectedly");
168
+ return Qnil;
169
+ }
170
+ word = fetched;
171
+
172
+ if( (tempstr = Registry_get(tc->lexicon_hash, RSTRING_PTR(word))) != NULL ){
173
+ //fetched = rb_ary_entry(tags,i);
174
+ //printf( "'%s'/%s -> %s\n", RSTRING_PTR(word), RSTRING_PTR(fetched), tempstr );
175
+ rb_ary_store( tags, i, rb_str_new2(tempstr) );
176
+ }
177
+ }
178
+ return Qnil;
179
+ }
180
+
181
+ static VALUE
182
+ BrillTagger_apply_contextual_rules( VALUE self, VALUE tokens, VALUE tags, VALUE rmove )
183
+ {
184
+ int i;
185
+ int token_length = RARRAY(tokens)->len;
186
+ int tags_length = RARRAY(tags)->len;
187
+ int rules_length;
188
+ int restrict_move = NUM2INT( rmove );
189
+ char **text_tags, **text_tokens;
190
+ VALUE fetched;
191
+ TaggerContext *tc;
192
+ Data_Get_Struct( self, TaggerContext, tc );
193
+
194
+ if( token_length != tags_length ){
195
+ rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
196
+ return Qnil;
197
+ }
198
+ if( restrict_move && Registry_entry_count( tc->lexicon_hash ) == 0 ){
199
+ rb_raise(rb_eArgError, "Must load a leicon before applying contextual rules");
200
+ return Qnil;
201
+ }
202
+
203
+ text_tags = (char**)malloc(sizeof(char*) * tags_length );
204
+ text_tokens = (char**)malloc(sizeof(char*) * token_length );
205
+
206
+ // load the tokens and tags into the char * arrays
207
+ for( i = 0; i < token_length; ++i ){
208
+ fetched = rb_ary_entry(tokens,i);
209
+ text_tokens[i] = strdup(RSTRING_PTR(fetched));
210
+ fetched = rb_ary_entry(tags,i);
211
+ text_tags[i] = strdup(RSTRING_PTR(fetched));
212
+ }
213
+
214
+ rules_length = Darray_len(tc->contextual_rule_array);
215
+ // Apply the rules
216
+ for( i = 0; i < rules_length; ++i ){
217
+ apply_contextual_rule(Darray_get(tc->contextual_rule_array, i),
218
+ text_tokens, text_tags, token_length,
219
+ restrict_move, tc->lexicon_hash, tc->lexicon_tag_hash);
220
+ }
221
+
222
+ // load the results back into ruby arrays
223
+ for( i = 0; i < token_length; ++i ){
224
+ rb_ary_store( tags, i, rb_str_new2(text_tags[i]) );
225
+ free(text_tags[i]);
226
+ free(text_tokens[i]);
227
+ }
228
+
229
+ free( text_tags );
230
+ free( text_tokens );
231
+
232
+ return Qnil;
233
+ }
234
+
235
+ void Init_rule_tagger()
236
+ {
237
+ rb_Tagger = rb_define_module( "Tagger" );
238
+ rb_BrillTagger = rb_define_class_under( rb_Tagger, "BrillTagger", rb_cObject );
239
+
240
+ rb_define_alloc_func( rb_BrillTagger, BrillTagger_alloc );
241
+
242
+ rb_define_method( rb_BrillTagger, "add_to_lexicon", BrillTagger_add_to_lexicon, 2 );
243
+ rb_define_method( rb_BrillTagger, "add_to_lexicon_tags", BrillTagger_add_to_lexicon_tags, 1 );
244
+ rb_define_method( rb_BrillTagger, "add_lexical_rule", BrillTagger_add_lexical_rule, 1 );
245
+ rb_define_method( rb_BrillTagger, "add_contextual_rule", BrillTagger_add_contextual_rule, 1 );
246
+ rb_define_method( rb_BrillTagger, "add_word_to_wordlist", BrillTagger_add_word_to_wordlist, 1 );
247
+ rb_define_method( rb_BrillTagger, "add_goodleft", BrillTagger_add_goodleft, 1 );
248
+ rb_define_method( rb_BrillTagger, "add_goodright", BrillTagger_add_goodright, 1 );
249
+ rb_define_method( rb_BrillTagger, "apply_lexical_rules", BrillTagger_apply_lexical_rules, 4 );
250
+ rb_define_method( rb_BrillTagger, "default_tag_finish", BrillTagger_default_tag_finish, 2 );
251
+ rb_define_method( rb_BrillTagger, "apply_contextual_rules", BrillTagger_apply_contextual_rules, 3 );
252
+ }
@@ -0,0 +1,326 @@
1
+ #include <stddef.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include <stdio.h>
5
+ #include <assert.h>
6
+ #include <ctype.h>
7
+
8
+ #include "sysdep.h"
9
+ #include "memory.h"
10
+ #include "bool.h"
11
+ #include "useful.h"
12
+
13
+ #include "registryP.h"
14
+
15
+ /* Creates and returns and empty directory */
16
+
17
+ Registry Registry_create(compare_func, hash_func)
18
+ Registry_CompareFunc compare_func;
19
+ Registry_HashFunc hash_func;
20
+ {
21
+ Registry_rep *temp = create();
22
+
23
+ temp->hash_table = NULL;
24
+ temp->ht_size = (unsigned int)0;
25
+ temp->comp_fun = compare_func;
26
+ temp->hash_fun = hash_func;
27
+ temp->record_count = (unsigned int)0;
28
+ Registry_size_hint(raise(temp), DEFAULT_HT_SIZE);
29
+ return raise(temp);
30
+ }
31
+
32
+ /* Deal with the expected size value. */
33
+
34
+ NORET Registry_size_hint(dir, size_hint_value)
35
+ Registry dir;
36
+ unsigned int size_hint_value;
37
+ {
38
+ int i;
39
+ if (lower(dir)->record_count != (unsigned int)0) return;
40
+ if (lower(dir)->ht_size != (unsigned int)0)
41
+ Memory_free((VOIDP)lower(dir)->hash_table);
42
+ lower(dir)->ht_size = size_hint_value;
43
+ lower(dir)->hash_table =
44
+ (RegistryRecord **)Memory_allocate(sizeof(RegistryRecord *)
45
+ * size_hint_value);
46
+ for (i=0; i < size_hint_value; ++i)
47
+ *(lower(dir)->hash_table + i) = (RegistryRecord *)NULL;
48
+ }
49
+
50
+ /* Finds a named object in a directory. Returns NULL if the named
51
+ * object is not in the directory */
52
+
53
+ VOIDP Registry_get(dir, key)
54
+ Registry dir;
55
+ CONSTVOIDP key;
56
+ {
57
+ RegistryRecord *p;
58
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
59
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
60
+
61
+ assert(comp_func);
62
+ for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
63
+ p != NULL;
64
+ p = p->next) {
65
+ if ((*comp_func)(key, p->name) == 0)
66
+ return p->obj;
67
+ }
68
+ return NULL; /* not found */
69
+ }
70
+
71
+ /* Finds a named object in a directory and returnd the original key */
72
+ /* used to index that object. Returns NULL if the named object is */
73
+ /* not in the directory. This is useful for getting the original */
74
+ /* string used to make an entry into a registry in order to free it. */
75
+ /* In this case, a pointer to the name should be storred, then the */
76
+ /* entry should be removed using Registry_remove, then the key may be */
77
+ /* freed */
78
+
79
+ VOIDP Registry_get_original_key(dir, key)
80
+ Registry dir;
81
+ CONSTVOIDP key;
82
+ {
83
+ RegistryRecord *p;
84
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
85
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
86
+
87
+ for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
88
+ p != NULL;
89
+ p = p->next) {
90
+ if ((*comp_func)(key, p->name) == 0)
91
+ return p->name;
92
+ }
93
+ return NULL; /* not found */
94
+ }
95
+
96
+ /* Adds a named object to a directory. Returns Bool_TRUE unless an error occurs.
97
+ * An error will occur if Registry_get(dir, name) would succeed (return
98
+ * non-NULL) */
99
+
100
+ Bool Registry_add(dir, name, obj)
101
+ Registry dir;
102
+ VOIDP name;
103
+ VOIDP obj;
104
+ {
105
+ RegistryRecord *p;
106
+ RegistryRecord **table_entry;
107
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
108
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
109
+
110
+ table_entry = lower(dir)->hash_table + (*hash_func)(name, lower(dir)->ht_size);
111
+
112
+ for (p = *table_entry;
113
+ p != NULL;
114
+ p = p->next) {
115
+ if ((*comp_func)(name, p->name) == 0)
116
+ return Bool_FALSE;
117
+ }
118
+
119
+ p = (RegistryRecord *)Memory_allocate(sizeof(RegistryRecord));
120
+ p->next = *table_entry;
121
+ p->name = name;
122
+ p->obj = obj;
123
+ *table_entry = p;
124
+ ++(lower(dir)->record_count);
125
+ return Bool_TRUE;
126
+ }
127
+
128
+ /* Removes a named object from the directory. Returns Bool_TRUE unless an
129
+ * error occurs (Bool_FALSE if an error does occur). The object is
130
+ * not freed. It is the responsibility of the
131
+ * caller to do so if necessary.
132
+ */
133
+
134
+ Bool Registry_remove(dir, key)
135
+ Registry dir;
136
+ CONSTVOIDP key;
137
+ {
138
+ RegistryRecord *p, **prev_p;
139
+ Registry_rep *ldir = lower(dir);
140
+ Registry_CompareFunc comp_func = ldir->comp_fun;
141
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
142
+
143
+ prev_p = lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size);
144
+ while ((p = *prev_p) != NULL) {
145
+ if ((*comp_func)(key, p->name) == 0) {
146
+ *prev_p = p->next;
147
+ Memory_free((VOIDP)p);
148
+ --(ldir->record_count);
149
+ return Bool_TRUE;
150
+ }
151
+ prev_p = &(p->next);
152
+ }
153
+ return Bool_FALSE;
154
+ }
155
+
156
+ /* Replaces an association in the registry. If an association with the
157
+ * given key already exists, the value is changed to new_value, and the
158
+ * old value is returned. If no association already exists, one is added
159
+ * and NULL is returned. */
160
+
161
+ VOIDP Registry_replace_value(dir, key, new_value)
162
+ Registry dir;
163
+ VOIDP key;
164
+ VOIDP new_value;
165
+ {
166
+ RegistryRecord *p;
167
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
168
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
169
+
170
+ VOIDP temp_obj;
171
+ for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
172
+ p != NULL;
173
+ p = p->next) {
174
+ if ((*comp_func)(key, p->name) == 0) {
175
+ temp_obj = p->obj;
176
+ p->obj = new_value;
177
+ return temp_obj;
178
+ }
179
+ }
180
+ Registry_add(dir, key, new_value);
181
+ return NULL; /* not found */
182
+ }
183
+
184
+ NORET Registry_traverse(dir, action, priv_ptr)
185
+ Registry dir;
186
+ Registry_ActionProc action;
187
+ VOIDP priv_ptr;
188
+ {
189
+ RegistryRecord *p;
190
+ int i;
191
+
192
+ for (i = 0; i < lower(dir)->ht_size; ++i)
193
+ for (p = *(lower(dir)->hash_table + i);
194
+ p != NULL;
195
+ p = p->next) {
196
+ (*action)(p->name, p->obj, priv_ptr);
197
+ }
198
+ return;
199
+ }
200
+
201
+ unsigned int Registry_entry_count(dir)
202
+ Registry dir;
203
+ {
204
+ return lower(dir)->record_count;
205
+ }
206
+
207
+ static NORET add_to_darrays(key, value, priv_ptr)
208
+ VOIDP key;
209
+ VOIDP value;
210
+ VOIDP priv_ptr;
211
+ {
212
+ struct darray_pair *dapp = (struct darray_pair *) priv_ptr;
213
+
214
+ if (dapp->key_darray != NULL)
215
+ Darray_addh(dapp->key_darray, (VOIDP)key); /* Specs forbid mods to key */
216
+ if (dapp->value_darray != NULL)
217
+ Darray_addh(dapp->value_darray, value);
218
+ }
219
+
220
+ NORET Registry_fetch_contents(dir, key_darray, value_darray)
221
+ Registry dir;
222
+ Darray key_darray, value_darray;
223
+ {
224
+ struct darray_pair dap;
225
+
226
+ assert (key_darray == NULL || Darray_len(key_darray) == 0);
227
+ assert (value_darray == NULL || Darray_len(value_darray) == 0);
228
+
229
+ dap.key_darray = key_darray;
230
+ dap.value_darray = value_darray;
231
+
232
+ Registry_traverse(dir, add_to_darrays, (VOIDP)&dap);
233
+ }
234
+
235
+ NORET Registry_destroy(dir)
236
+ Registry dir;
237
+ {
238
+ RegistryRecord *p, *next;
239
+ int i;
240
+
241
+ for (i = 0; i < lower(dir)->ht_size; ++i)
242
+ for (p = *(lower(dir)->hash_table + i);
243
+ p != NULL;
244
+ p = next) {
245
+ next = p->next;
246
+ Memory_free((VOIDP)p);
247
+ }
248
+ Memory_free((VOIDP)lower(dir)->hash_table);
249
+ destroy(lower(dir));
250
+ }
251
+
252
+ unsigned int Registry_ptrhash(ptr, htsize)
253
+ CONSTVOIDP ptr;
254
+ unsigned int htsize;
255
+ {
256
+ unsigned int uns_int_ptr = (unsigned int)ptr;
257
+
258
+ if ((int)uns_int_ptr > 0)
259
+ return htsize ? (int)uns_int_ptr % (int)htsize : 0;
260
+ else
261
+ return htsize ? uns_int_ptr % htsize : 0;
262
+ }
263
+
264
+ unsigned int Registry_strhash(strp, htsize)
265
+ CONSTVOIDP strp;
266
+ unsigned int htsize;
267
+ {
268
+ char *cp = (char *)strp;
269
+ int hash_temp = 0;
270
+
271
+ while (*cp != '\0') {
272
+ if (hash_temp < 0)
273
+ hash_temp = (hash_temp << 1) +1;
274
+ else
275
+ hash_temp = hash_temp << 1;
276
+ hash_temp ^= *cp;
277
+ ++cp;
278
+ }
279
+ return htsize ? ((unsigned int)hash_temp) % htsize : 0;
280
+ }
281
+
282
+ unsigned int Registry_strcasehash(strp, htsize)
283
+ CONSTVOIDP strp;
284
+ unsigned int htsize;
285
+ {
286
+ char *cp = (char *)strp;
287
+ int hash_temp = 0;
288
+
289
+ while (*cp != '\0') {
290
+ if (hash_temp < 0)
291
+ hash_temp = (hash_temp << 1) +1;
292
+ else
293
+ hash_temp = hash_temp << 1;
294
+ if (isalpha(*cp) && isupper(*cp))
295
+ hash_temp ^= tolower(*cp);
296
+ else
297
+ hash_temp ^= *cp;
298
+ ++cp;
299
+ }
300
+ return htsize ? ((unsigned int)hash_temp) % htsize : 0;
301
+ }
302
+
303
+ int Registry_strcmp(str1, str2)
304
+ CONSTVOIDP str1;
305
+ CONSTVOIDP str2;
306
+ {
307
+ return strcmp((char *)str1, (char *)str2);
308
+ }
309
+
310
+ int Registry_strcasecmp(str1, str2)
311
+ CONSTVOIDP str1;
312
+ CONSTVOIDP str2;
313
+ {
314
+ return strcasecmp((char *)str1, (char *)str2);
315
+ }
316
+
317
+ int Registry_ptrcmp(ptr1, ptr2)
318
+ CONSTVOIDP ptr1;
319
+ CONSTVOIDP ptr2;
320
+ {
321
+ if (ptr1==ptr2)
322
+ return 0;
323
+ else
324
+ return 1;
325
+ }
326
+
@@ -0,0 +1,129 @@
1
+ #ifndef _registry_h_
2
+ #define _registry_h_
3
+
4
+ #include "sysdep.h"
5
+ #include "bool.h"
6
+ #include "darray.h"
7
+
8
+ typedef struct Registry_st *Registry;
9
+
10
+ #ifdef __STDC__
11
+ typedef unsigned int (*Registry_HashFunc)(CONSTVOIDP, unsigned int);
12
+ typedef int (*Registry_CompareFunc)(CONSTVOIDP, CONSTVOIDP);
13
+ typedef NORET (*Registry_ActionProc)(VOIDP, VOIDP, VOIDP);
14
+ extern Registry Registry_create(Registry_CompareFunc, Registry_HashFunc);
15
+ extern NORET Registry_size_hint(Registry, unsigned int);
16
+ extern Bool Registry_add(Registry, VOIDP, VOIDP);
17
+ extern Bool Registry_remove(Registry, CONSTVOIDP);
18
+ extern VOIDP Registry_get(Registry, CONSTVOIDP);
19
+ extern VOIDP Registry_get_original_key(Registry, CONSTVOIDP);
20
+ extern VOIDP Registry_replace_value(Registry, VOIDP, VOIDP);
21
+ extern NORET Registry_traverse(Registry, Registry_ActionProc, VOIDP);
22
+ extern unsigned int Registry_entry_count(Registry);
23
+ extern NORET Registry_fetch_contents(Registry, Darray, Darray);
24
+ extern NORET Registry_destroy(Registry);
25
+ extern int Registry_ptrcmp(CONSTVOIDP, CONSTVOIDP);
26
+ extern unsigned int Registry_ptrhash(CONSTVOIDP, unsigned int);
27
+ extern int Registry_strcmp(CONSTVOIDP, CONSTVOIDP);
28
+ extern unsigned int Registry_strhash(CONSTVOIDP, unsigned int);
29
+ extern int Registry_strcasecmp(CONSTVOIDP, CONSTVOIDP);
30
+ extern unsigned int Registry_strcasehash(CONSTVOIDP, unsigned int);
31
+ #else
32
+ typedef unsigned int (*Registry_HashFunc)();
33
+ typedef int (*Registry_CompareFunc)();
34
+ typedef void (*Registry_ActionProc)();
35
+ extern Registry Registry_create();
36
+ extern NORET Registry_size_hint();
37
+ extern int Registry_add();
38
+ extern int Registry_remove();
39
+ extern VOIDP Registry_get();
40
+ extern VOIDP Registry_get_original_key();
41
+ extern VOIDP Registry_replace_value();
42
+ extern NORET Registry_traverse();
43
+ extern unsigned int Registry_entry_count();
44
+ extern NORET Registry_fetch_contents();
45
+ extern NORET Registry_destroy();
46
+ extern int Registry_ptrcmp();
47
+ extern unsigned int Registry_ptrhash();
48
+ extern int Registry_strcmp();
49
+ extern unsigned int Registry_strhash();
50
+ extern int Registry_strcasecmp();
51
+ extern unsigned int Registry_strcasehash();
52
+ #endif /* __STDC__ */
53
+
54
+
55
+ /*
56
+ * Registry_create(compare_func, hash_func)
57
+ * Creates and returns an empty registry. compare_func is used
58
+ * to compare items in the registry. It should return 0 if its
59
+ * arguments are to be considered equal. hash_func should return
60
+ * a number between 0 and its second argument, and should attempt
61
+ * an even distribution. If compare_func
62
+ * would return 0 for a pair of objects, hash_fuct should return
63
+ * the same value for those objects. For registries of abstract
64
+ * objects (pointers), Registry_ptrcmp() and Registry_ptrhash() should
65
+ * be passed as the compare_func and hash_func. Registry_strcmp and
66
+ * Registry_strhash() may be used for strings. Registry_strcasecmp and
67
+ * Registry_strcasehash() may be used for strings where case is not
68
+ * significant (case-insensitive).
69
+ *
70
+ * Registry_size_hint(registry, size_hint_value)
71
+ * The registry may operate more efficiently if this operator is called
72
+ * and size_hint is close to the maximum number of elements to be in
73
+ * the Registry, at the possible cost of additional memory use. Likely
74
+ * to be effective only on an empty registry.
75
+ *
76
+ * Registry_add(registry, key, value)
77
+ * Adds the association between key and value to the registry. Neither
78
+ * key nor value are copied, and neither may be freed before being removed
79
+ * from the registry. The key should not be modified in way that would
80
+ * change the value of the compare_func or the hash_func until this
81
+ * association is removed from the registry.
82
+ * Will return Bool_FALSE if an association with the
83
+ * same key is already in the registry (in which case the add will not be
84
+ * performed), Bool_TRUE otherwise (on successful completion).
85
+ *
86
+ * Registry_remove(registry, key)
87
+ * Removes the association with key from the registry. Returns Bool_FALSE
88
+ * if no such association exists, Bool_TRUE otherwise (on successful
89
+ * completion)
90
+ *
91
+ * Registry_get(registry, key)
92
+ * Returns the value associated with key in the registry. Returns NULL
93
+ * if there is no such association.
94
+ *
95
+ * Registry_get_original_key(registry, key) (added by Rich Pito 7/91)
96
+ * Finds a named object in a directory and returnd the original key
97
+ * used to index that object. Returns NULL if the named object is
98
+ * not in the directory. This is useful for getting the original
99
+ * string used to make an entry into a registry in order to free it.
100
+ * In this case, a pointer to the name should be storred, then the
101
+ * entry should be removed using Registry_remove, then the key may be
102
+ * freed
103
+ *
104
+ * Registry_traverse(registry, action_proc, private_pointer)
105
+ * Calls action_proc once for each entry in the registry. private_pointer
106
+ * is a VOIDP which is passed to the action_proc, but not otherwise used.
107
+ * action_proc should not modify the registry in any way. action_proc takes
108
+ * three arguments, the key, the value, and private_pointer.
109
+ *
110
+ * Registry_entry_count(registry)
111
+ * Returns the number of associations in the registry.
112
+ *
113
+ * Registry_fetch_contents(registry, key_darray, value_darray)
114
+ * Stores the contents of the registry as follows: In no particular
115
+ * order, each association is processed in turn by storing (using Darray_addh)
116
+ * the key into key_darray and the value into value_darray. Either
117
+ * or both key_darray and/or value_darray may be NULL, in which case
118
+ * the corresponding data will not be processed. Actual Darrays passed
119
+ * (not NULL) must be empty. Any objects added to key_darray must be
120
+ * treated as read-only as long as they remain in the registry.
121
+ *
122
+ * Registry_destroy(registry)
123
+ * Deallocates all resources needed by the registry. Should be the last
124
+ * operation performed on the registry. Does not deallocate the objects
125
+ * (keys and values) contained in the registry (this should be done after
126
+ * the registry is destroyed). Implicitly removes all associations
127
+ * from the registry.
128
+ */
129
+ #endif /* _resgistry_h_ */