rbtagger 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/COPYING +21 -0
  2. data/History.txt +4 -0
  3. data/LICENSE +21 -0
  4. data/License.txt +20 -0
  5. data/Manifest.txt +75 -0
  6. data/PostInstall.txt +7 -0
  7. data/README +7 -0
  8. data/README.txt +53 -0
  9. data/Rakefile +33 -0
  10. data/config/hoe.rb +74 -0
  11. data/config/requirements.rb +15 -0
  12. data/ext/rule_tagger/bool.h +38 -0
  13. data/ext/rule_tagger/darray.c +292 -0
  14. data/ext/rule_tagger/darray.h +125 -0
  15. data/ext/rule_tagger/darrayP.h +50 -0
  16. data/ext/rule_tagger/extconf.rb +14 -0
  17. data/ext/rule_tagger/lex.c +170 -0
  18. data/ext/rule_tagger/lex.h +49 -0
  19. data/ext/rule_tagger/memory.c +127 -0
  20. data/ext/rule_tagger/memory.h +20 -0
  21. data/ext/rule_tagger/rbtagger.c +252 -0
  22. data/ext/rule_tagger/registry.c +326 -0
  23. data/ext/rule_tagger/registry.h +129 -0
  24. data/ext/rule_tagger/registryP.h +46 -0
  25. data/ext/rule_tagger/ruby-compat.h +20 -0
  26. data/ext/rule_tagger/rules.c +525 -0
  27. data/ext/rule_tagger/rules.h +42 -0
  28. data/ext/rule_tagger/sysdep.h +20 -0
  29. data/ext/rule_tagger/tagger.c +110 -0
  30. data/ext/rule_tagger/tagger.h +46 -0
  31. data/ext/rule_tagger/useful.c +44 -0
  32. data/ext/rule_tagger/useful.h +51 -0
  33. data/ext/word_tagger/extconf.rb +7 -0
  34. data/ext/word_tagger/porter_stemmer.c +430 -0
  35. data/ext/word_tagger/porter_stemmer.h +19 -0
  36. data/ext/word_tagger/rtagger.cc +83 -0
  37. data/ext/word_tagger/tagger.cc +153 -0
  38. data/ext/word_tagger/tagger.h +27 -0
  39. data/ext/word_tagger/tagger.rb +8 -0
  40. data/ext/word_tagger/test/Makefile +22 -0
  41. data/ext/word_tagger/test/doc.txt +87 -0
  42. data/ext/word_tagger/test/test.cc +107 -0
  43. data/ext/word_tagger/test.rb +31 -0
  44. data/lib/brill/tagger.rb +225 -0
  45. data/lib/rbtagger/version.rb +9 -0
  46. data/lib/rbtagger.rb +6 -0
  47. data/script/console +10 -0
  48. data/script/destroy +14 -0
  49. data/script/generate +14 -0
  50. data/script/txt2html +82 -0
  51. data/setup.rb +1585 -0
  52. data/tasks/deployment.rake +34 -0
  53. data/tasks/environment.rake +7 -0
  54. data/tasks/website.rake +17 -0
  55. data/test/CONTEXTUALRULEFILE +284 -0
  56. data/test/LEXICALRULEFILE +148 -0
  57. data/test/LEXICON +93696 -0
  58. data/test/docs/doc0.txt +20 -0
  59. data/test/docs/doc1.txt +11 -0
  60. data/test/docs/doc2.txt +52 -0
  61. data/test/docs/doc3.txt +128 -0
  62. data/test/docs/doc4.txt +337 -0
  63. data/test/docs/doc5.txt +497 -0
  64. data/test/docs/doc6.txt +116 -0
  65. data/test/docs/doc7.txt +101 -0
  66. data/test/docs/doc8.txt +25 -0
  67. data/test/docs/doc9.txt +84 -0
  68. data/test/tagger_test.rb +60 -0
  69. data/test/test_helper.rb +2 -0
  70. data/tools/rakehelp.rb +113 -0
  71. data/website/index.html +113 -0
  72. data/website/index.txt +53 -0
  73. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  74. data/website/stylesheets/screen.css +138 -0
  75. data/website/template.html.erb +48 -0
  76. metadata +155 -0
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Copyright (c) 2008 Todd A. Fisher
3
+ * see LICENSE
4
+ */
5
+ #include "ruby.h"
6
+ #include "tagger.h"
7
+ #include "ruby-compat.h"
8
+
9
+ static VALUE rb_Tagger;
10
+ static VALUE rb_BrillTagger;
11
+
12
+ static
13
+ VALUE BrillTagger_alloc(VALUE klass)
14
+ {
15
+ VALUE object;
16
+ TaggerContext *tc = tagger_context_new();
17
+ object = Data_Wrap_Struct( klass, NULL, tagger_context_free, tc );
18
+ return object;
19
+ }
20
+
21
+ static VALUE
22
+ BrillTagger_add_to_lexicon( VALUE self, VALUE word, VALUE tag )
23
+ {
24
+ TaggerContext *tc;
25
+ Data_Get_Struct( self, TaggerContext, tc );
26
+ tagger_context_add_to_lexicon( tc, RSTRING_PTR(word), RSTRING_PTR(tag) );
27
+ return Qnil;
28
+ }
29
+ static VALUE
30
+ BrillTagger_add_to_lexicon_tags( VALUE self, VALUE bigram )
31
+ {
32
+ TaggerContext *tc;
33
+ Data_Get_Struct( self, TaggerContext, tc );
34
+ tagger_context_add_to_lexicon_tags( tc, RSTRING_PTR(bigram) );
35
+ return Qnil;
36
+ }
37
+ static VALUE
38
+ BrillTagger_add_contextual_rule( VALUE self, VALUE rule )
39
+ {
40
+ TaggerContext *tc;
41
+ Data_Get_Struct( self, TaggerContext, tc );
42
+ tagger_context_add_contextual_rule( tc, RSTRING_PTR(rule) );
43
+ return Qnil;
44
+ }
45
+ static VALUE
46
+ BrillTagger_add_lexical_rule( VALUE self, VALUE rule )
47
+ {
48
+ TaggerContext *tc;
49
+ Data_Get_Struct( self, TaggerContext, tc );
50
+ tagger_context_add_lexical_rule( tc, RSTRING_PTR(rule) );
51
+ return Qnil;
52
+ }
53
+ static VALUE
54
+ BrillTagger_add_word_to_wordlist( VALUE self, VALUE word )
55
+ {
56
+ TaggerContext *tc;
57
+ Data_Get_Struct( self, TaggerContext, tc );
58
+ tagger_context_add_word_to_wordlist( tc, RSTRING_PTR(word) );
59
+ return Qnil;
60
+ }
61
+
62
+ static VALUE
63
+ BrillTagger_add_goodleft( VALUE self, VALUE word )
64
+ {
65
+ TaggerContext *tc;
66
+ Data_Get_Struct( self, TaggerContext, tc );
67
+ tagger_context_add_goodleft( tc, RSTRING_PTR(word) );
68
+ return Qnil;
69
+ }
70
+
71
+ static VALUE
72
+ BrillTagger_add_goodright( VALUE self, VALUE word )
73
+ {
74
+ TaggerContext *tc;
75
+ Data_Get_Struct( self, TaggerContext, tc );
76
+ tagger_context_add_goodright( tc, RSTRING_PTR(word) );
77
+ return Qnil;
78
+ }
79
+
80
+ static VALUE
81
+ BrillTagger_apply_lexical_rules( VALUE self, VALUE tokens, VALUE tags, VALUE wordlist, VALUE extrawds )
82
+ {
83
+ TaggerContext *tc;
84
+ int i = 0;
85
+ int token_length = RARRAY(tokens)->len;
86
+ int tags_length = RARRAY(tags)->len;
87
+ int rules_length;
88
+ VALUE fetched;
89
+ int EXTRAWDS = NUM2INT( extrawds );
90
+ Data_Get_Struct( self, TaggerContext, tc );
91
+
92
+ if( token_length != tags_length ){
93
+ rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
94
+ return Qnil;
95
+ }
96
+
97
+ Darray text_array = Darray_create();
98
+ Darray tag_array = Darray_create();
99
+
100
+ Darray_hint( text_array, token_length, token_length );
101
+ Darray_hint( tag_array, token_length, token_length );
102
+
103
+ for( i = 0; i < token_length; ++i ){
104
+ fetched = rb_ary_entry(tokens,i);
105
+ if( fetched == Qnil ){
106
+ fprintf(stderr, "token missing %d of %d\n", i, token_length );
107
+ rb_raise(rb_eArgError, "Token was missing unexpectedly");
108
+ return Qnil;
109
+ }
110
+ Darray_addh(text_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
111
+ fetched = rb_ary_entry(tags,i);
112
+ if( fetched == Qnil ){
113
+ fprintf(stderr, "tag missing %d of %d\n", i, token_length );
114
+ rb_raise(rb_eArgError, "Tag was missing unexpectedly");
115
+ return Qnil;
116
+ }
117
+ Darray_addh(tag_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
118
+ }
119
+ rules_length = Darray_len(tc->rule_array);
120
+ /* Apply the rules */
121
+ for( i = 0; i < rules_length; ++i ) {
122
+ apply_lexical_rule( Darray_get(tc->rule_array, i),
123
+ text_array, tag_array,
124
+ tc->lexicon_hash,
125
+ tc->wordlist_hash,
126
+ tc->bigram_hash,
127
+ EXTRAWDS );
128
+ }
129
+ /* Stuff the results back into the ruby arrays */
130
+ for( i = 0; i < token_length; ++i ) {
131
+ char *text_strref = (char*)Darray_get( text_array, i );
132
+ char *tag_strref = (char*)Darray_get( tag_array, i );
133
+
134
+ // copy into ruby space
135
+ rb_ary_store( tokens, i, rb_str_new2(text_strref) );
136
+ rb_ary_store( tags, i, rb_str_new2( tag_strref ) );
137
+
138
+ free( text_strref );
139
+ free( tag_strref );
140
+ }
141
+
142
+ Darray_destroy(text_array);
143
+ Darray_destroy(tag_array);
144
+
145
+ return Qnil;
146
+ }
147
+ static VALUE
148
+ BrillTagger_default_tag_finish( VALUE self, VALUE tokens, VALUE tags )
149
+ {
150
+ int i;
151
+ VALUE fetched, word;
152
+ char *tempstr;
153
+ int token_length = RARRAY(tokens)->len;
154
+ int tags_length = RARRAY(tags)->len;
155
+ TaggerContext *tc;
156
+
157
+ Data_Get_Struct( self, TaggerContext, tc );
158
+
159
+ if( token_length != tags_length ){
160
+ rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
161
+ return Qnil;
162
+ }
163
+
164
+ for( i = 0; i < token_length; ++i ){
165
+ fetched = rb_ary_entry(tokens,i);
166
+ if( fetched == Qnil ){
167
+ rb_raise(rb_eArgError, "Token was missing unexpectedly");
168
+ return Qnil;
169
+ }
170
+ word = fetched;
171
+
172
+ if( (tempstr = Registry_get(tc->lexicon_hash, RSTRING_PTR(word))) != NULL ){
173
+ //fetched = rb_ary_entry(tags,i);
174
+ //printf( "'%s'/%s -> %s\n", RSTRING_PTR(word), RSTRING_PTR(fetched), tempstr );
175
+ rb_ary_store( tags, i, rb_str_new2(tempstr) );
176
+ }
177
+ }
178
+ return Qnil;
179
+ }
180
+
181
+ static VALUE
182
+ BrillTagger_apply_contextual_rules( VALUE self, VALUE tokens, VALUE tags, VALUE rmove )
183
+ {
184
+ int i;
185
+ int token_length = RARRAY(tokens)->len;
186
+ int tags_length = RARRAY(tags)->len;
187
+ int rules_length;
188
+ int restrict_move = NUM2INT( rmove );
189
+ char **text_tags, **text_tokens;
190
+ VALUE fetched;
191
+ TaggerContext *tc;
192
+ Data_Get_Struct( self, TaggerContext, tc );
193
+
194
+ if( token_length != tags_length ){
195
+ rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
196
+ return Qnil;
197
+ }
198
+ if( restrict_move && Registry_entry_count( tc->lexicon_hash ) == 0 ){
199
+ rb_raise(rb_eArgError, "Must load a leicon before applying contextual rules");
200
+ return Qnil;
201
+ }
202
+
203
+ text_tags = (char**)malloc(sizeof(char*) * tags_length );
204
+ text_tokens = (char**)malloc(sizeof(char*) * token_length );
205
+
206
+ // load the tokens and tags into the char * arrays
207
+ for( i = 0; i < token_length; ++i ){
208
+ fetched = rb_ary_entry(tokens,i);
209
+ text_tokens[i] = strdup(RSTRING_PTR(fetched));
210
+ fetched = rb_ary_entry(tags,i);
211
+ text_tags[i] = strdup(RSTRING_PTR(fetched));
212
+ }
213
+
214
+ rules_length = Darray_len(tc->contextual_rule_array);
215
+ // Apply the rules
216
+ for( i = 0; i < rules_length; ++i ){
217
+ apply_contextual_rule(Darray_get(tc->contextual_rule_array, i),
218
+ text_tokens, text_tags, token_length,
219
+ restrict_move, tc->lexicon_hash, tc->lexicon_tag_hash);
220
+ }
221
+
222
+ // load the results back into ruby arrays
223
+ for( i = 0; i < token_length; ++i ){
224
+ rb_ary_store( tags, i, rb_str_new2(text_tags[i]) );
225
+ free(text_tags[i]);
226
+ free(text_tokens[i]);
227
+ }
228
+
229
+ free( text_tags );
230
+ free( text_tokens );
231
+
232
+ return Qnil;
233
+ }
234
+
235
+ void Init_rule_tagger()
236
+ {
237
+ rb_Tagger = rb_define_module( "Tagger" );
238
+ rb_BrillTagger = rb_define_class_under( rb_Tagger, "BrillTagger", rb_cObject );
239
+
240
+ rb_define_alloc_func( rb_BrillTagger, BrillTagger_alloc );
241
+
242
+ rb_define_method( rb_BrillTagger, "add_to_lexicon", BrillTagger_add_to_lexicon, 2 );
243
+ rb_define_method( rb_BrillTagger, "add_to_lexicon_tags", BrillTagger_add_to_lexicon_tags, 1 );
244
+ rb_define_method( rb_BrillTagger, "add_lexical_rule", BrillTagger_add_lexical_rule, 1 );
245
+ rb_define_method( rb_BrillTagger, "add_contextual_rule", BrillTagger_add_contextual_rule, 1 );
246
+ rb_define_method( rb_BrillTagger, "add_word_to_wordlist", BrillTagger_add_word_to_wordlist, 1 );
247
+ rb_define_method( rb_BrillTagger, "add_goodleft", BrillTagger_add_goodleft, 1 );
248
+ rb_define_method( rb_BrillTagger, "add_goodright", BrillTagger_add_goodright, 1 );
249
+ rb_define_method( rb_BrillTagger, "apply_lexical_rules", BrillTagger_apply_lexical_rules, 4 );
250
+ rb_define_method( rb_BrillTagger, "default_tag_finish", BrillTagger_default_tag_finish, 2 );
251
+ rb_define_method( rb_BrillTagger, "apply_contextual_rules", BrillTagger_apply_contextual_rules, 3 );
252
+ }
@@ -0,0 +1,326 @@
1
+ #include <stddef.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include <stdio.h>
5
+ #include <assert.h>
6
+ #include <ctype.h>
7
+
8
+ #include "sysdep.h"
9
+ #include "memory.h"
10
+ #include "bool.h"
11
+ #include "useful.h"
12
+
13
+ #include "registryP.h"
14
+
15
+ /* Creates and returns and empty directory */
16
+
17
+ Registry Registry_create(compare_func, hash_func)
18
+ Registry_CompareFunc compare_func;
19
+ Registry_HashFunc hash_func;
20
+ {
21
+ Registry_rep *temp = create();
22
+
23
+ temp->hash_table = NULL;
24
+ temp->ht_size = (unsigned int)0;
25
+ temp->comp_fun = compare_func;
26
+ temp->hash_fun = hash_func;
27
+ temp->record_count = (unsigned int)0;
28
+ Registry_size_hint(raise(temp), DEFAULT_HT_SIZE);
29
+ return raise(temp);
30
+ }
31
+
32
+ /* Deal with the expected size value. */
33
+
34
+ NORET Registry_size_hint(dir, size_hint_value)
35
+ Registry dir;
36
+ unsigned int size_hint_value;
37
+ {
38
+ int i;
39
+ if (lower(dir)->record_count != (unsigned int)0) return;
40
+ if (lower(dir)->ht_size != (unsigned int)0)
41
+ Memory_free((VOIDP)lower(dir)->hash_table);
42
+ lower(dir)->ht_size = size_hint_value;
43
+ lower(dir)->hash_table =
44
+ (RegistryRecord **)Memory_allocate(sizeof(RegistryRecord *)
45
+ * size_hint_value);
46
+ for (i=0; i < size_hint_value; ++i)
47
+ *(lower(dir)->hash_table + i) = (RegistryRecord *)NULL;
48
+ }
49
+
50
+ /* Finds a named object in a directory. Returns NULL if the named
51
+ * object is not in the directory */
52
+
53
+ VOIDP Registry_get(dir, key)
54
+ Registry dir;
55
+ CONSTVOIDP key;
56
+ {
57
+ RegistryRecord *p;
58
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
59
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
60
+
61
+ assert(comp_func);
62
+ for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
63
+ p != NULL;
64
+ p = p->next) {
65
+ if ((*comp_func)(key, p->name) == 0)
66
+ return p->obj;
67
+ }
68
+ return NULL; /* not found */
69
+ }
70
+
71
+ /* Finds a named object in a directory and returnd the original key */
72
+ /* used to index that object. Returns NULL if the named object is */
73
+ /* not in the directory. This is useful for getting the original */
74
+ /* string used to make an entry into a registry in order to free it. */
75
+ /* In this case, a pointer to the name should be storred, then the */
76
+ /* entry should be removed using Registry_remove, then the key may be */
77
+ /* freed */
78
+
79
+ VOIDP Registry_get_original_key(dir, key)
80
+ Registry dir;
81
+ CONSTVOIDP key;
82
+ {
83
+ RegistryRecord *p;
84
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
85
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
86
+
87
+ for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
88
+ p != NULL;
89
+ p = p->next) {
90
+ if ((*comp_func)(key, p->name) == 0)
91
+ return p->name;
92
+ }
93
+ return NULL; /* not found */
94
+ }
95
+
96
+ /* Adds a named object to a directory. Returns Bool_TRUE unless an error occurs.
97
+ * An error will occur if Registry_get(dir, name) would succeed (return
98
+ * non-NULL) */
99
+
100
+ Bool Registry_add(dir, name, obj)
101
+ Registry dir;
102
+ VOIDP name;
103
+ VOIDP obj;
104
+ {
105
+ RegistryRecord *p;
106
+ RegistryRecord **table_entry;
107
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
108
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
109
+
110
+ table_entry = lower(dir)->hash_table + (*hash_func)(name, lower(dir)->ht_size);
111
+
112
+ for (p = *table_entry;
113
+ p != NULL;
114
+ p = p->next) {
115
+ if ((*comp_func)(name, p->name) == 0)
116
+ return Bool_FALSE;
117
+ }
118
+
119
+ p = (RegistryRecord *)Memory_allocate(sizeof(RegistryRecord));
120
+ p->next = *table_entry;
121
+ p->name = name;
122
+ p->obj = obj;
123
+ *table_entry = p;
124
+ ++(lower(dir)->record_count);
125
+ return Bool_TRUE;
126
+ }
127
+
128
+ /* Removes a named object from the directory. Returns Bool_TRUE unless an
129
+ * error occurs (Bool_FALSE if an error does occur). The object is
130
+ * not freed. It is the responsibility of the
131
+ * caller to do so if necessary.
132
+ */
133
+
134
+ Bool Registry_remove(dir, key)
135
+ Registry dir;
136
+ CONSTVOIDP key;
137
+ {
138
+ RegistryRecord *p, **prev_p;
139
+ Registry_rep *ldir = lower(dir);
140
+ Registry_CompareFunc comp_func = ldir->comp_fun;
141
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
142
+
143
+ prev_p = lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size);
144
+ while ((p = *prev_p) != NULL) {
145
+ if ((*comp_func)(key, p->name) == 0) {
146
+ *prev_p = p->next;
147
+ Memory_free((VOIDP)p);
148
+ --(ldir->record_count);
149
+ return Bool_TRUE;
150
+ }
151
+ prev_p = &(p->next);
152
+ }
153
+ return Bool_FALSE;
154
+ }
155
+
156
+ /* Replaces an association in the registry. If an association with the
157
+ * given key already exists, the value is changed to new_value, and the
158
+ * old value is returned. If no association already exists, one is added
159
+ * and NULL is returned. */
160
+
161
+ VOIDP Registry_replace_value(dir, key, new_value)
162
+ Registry dir;
163
+ VOIDP key;
164
+ VOIDP new_value;
165
+ {
166
+ RegistryRecord *p;
167
+ Registry_CompareFunc comp_func = lower(dir)->comp_fun;
168
+ Registry_HashFunc hash_func = lower(dir)->hash_fun;
169
+
170
+ VOIDP temp_obj;
171
+ for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
172
+ p != NULL;
173
+ p = p->next) {
174
+ if ((*comp_func)(key, p->name) == 0) {
175
+ temp_obj = p->obj;
176
+ p->obj = new_value;
177
+ return temp_obj;
178
+ }
179
+ }
180
+ Registry_add(dir, key, new_value);
181
+ return NULL; /* not found */
182
+ }
183
+
184
+ NORET Registry_traverse(dir, action, priv_ptr)
185
+ Registry dir;
186
+ Registry_ActionProc action;
187
+ VOIDP priv_ptr;
188
+ {
189
+ RegistryRecord *p;
190
+ int i;
191
+
192
+ for (i = 0; i < lower(dir)->ht_size; ++i)
193
+ for (p = *(lower(dir)->hash_table + i);
194
+ p != NULL;
195
+ p = p->next) {
196
+ (*action)(p->name, p->obj, priv_ptr);
197
+ }
198
+ return;
199
+ }
200
+
201
+ unsigned int Registry_entry_count(dir)
202
+ Registry dir;
203
+ {
204
+ return lower(dir)->record_count;
205
+ }
206
+
207
+ static NORET add_to_darrays(key, value, priv_ptr)
208
+ VOIDP key;
209
+ VOIDP value;
210
+ VOIDP priv_ptr;
211
+ {
212
+ struct darray_pair *dapp = (struct darray_pair *) priv_ptr;
213
+
214
+ if (dapp->key_darray != NULL)
215
+ Darray_addh(dapp->key_darray, (VOIDP)key); /* Specs forbid mods to key */
216
+ if (dapp->value_darray != NULL)
217
+ Darray_addh(dapp->value_darray, value);
218
+ }
219
+
220
+ NORET Registry_fetch_contents(dir, key_darray, value_darray)
221
+ Registry dir;
222
+ Darray key_darray, value_darray;
223
+ {
224
+ struct darray_pair dap;
225
+
226
+ assert (key_darray == NULL || Darray_len(key_darray) == 0);
227
+ assert (value_darray == NULL || Darray_len(value_darray) == 0);
228
+
229
+ dap.key_darray = key_darray;
230
+ dap.value_darray = value_darray;
231
+
232
+ Registry_traverse(dir, add_to_darrays, (VOIDP)&dap);
233
+ }
234
+
235
+ NORET Registry_destroy(dir)
236
+ Registry dir;
237
+ {
238
+ RegistryRecord *p, *next;
239
+ int i;
240
+
241
+ for (i = 0; i < lower(dir)->ht_size; ++i)
242
+ for (p = *(lower(dir)->hash_table + i);
243
+ p != NULL;
244
+ p = next) {
245
+ next = p->next;
246
+ Memory_free((VOIDP)p);
247
+ }
248
+ Memory_free((VOIDP)lower(dir)->hash_table);
249
+ destroy(lower(dir));
250
+ }
251
+
252
+ unsigned int Registry_ptrhash(ptr, htsize)
253
+ CONSTVOIDP ptr;
254
+ unsigned int htsize;
255
+ {
256
+ unsigned int uns_int_ptr = (unsigned int)ptr;
257
+
258
+ if ((int)uns_int_ptr > 0)
259
+ return htsize ? (int)uns_int_ptr % (int)htsize : 0;
260
+ else
261
+ return htsize ? uns_int_ptr % htsize : 0;
262
+ }
263
+
264
+ unsigned int Registry_strhash(strp, htsize)
265
+ CONSTVOIDP strp;
266
+ unsigned int htsize;
267
+ {
268
+ char *cp = (char *)strp;
269
+ int hash_temp = 0;
270
+
271
+ while (*cp != '\0') {
272
+ if (hash_temp < 0)
273
+ hash_temp = (hash_temp << 1) +1;
274
+ else
275
+ hash_temp = hash_temp << 1;
276
+ hash_temp ^= *cp;
277
+ ++cp;
278
+ }
279
+ return htsize ? ((unsigned int)hash_temp) % htsize : 0;
280
+ }
281
+
282
+ unsigned int Registry_strcasehash(strp, htsize)
283
+ CONSTVOIDP strp;
284
+ unsigned int htsize;
285
+ {
286
+ char *cp = (char *)strp;
287
+ int hash_temp = 0;
288
+
289
+ while (*cp != '\0') {
290
+ if (hash_temp < 0)
291
+ hash_temp = (hash_temp << 1) +1;
292
+ else
293
+ hash_temp = hash_temp << 1;
294
+ if (isalpha(*cp) && isupper(*cp))
295
+ hash_temp ^= tolower(*cp);
296
+ else
297
+ hash_temp ^= *cp;
298
+ ++cp;
299
+ }
300
+ return htsize ? ((unsigned int)hash_temp) % htsize : 0;
301
+ }
302
+
303
+ int Registry_strcmp(str1, str2)
304
+ CONSTVOIDP str1;
305
+ CONSTVOIDP str2;
306
+ {
307
+ return strcmp((char *)str1, (char *)str2);
308
+ }
309
+
310
+ int Registry_strcasecmp(str1, str2)
311
+ CONSTVOIDP str1;
312
+ CONSTVOIDP str2;
313
+ {
314
+ return strcasecmp((char *)str1, (char *)str2);
315
+ }
316
+
317
+ int Registry_ptrcmp(ptr1, ptr2)
318
+ CONSTVOIDP ptr1;
319
+ CONSTVOIDP ptr2;
320
+ {
321
+ if (ptr1==ptr2)
322
+ return 0;
323
+ else
324
+ return 1;
325
+ }
326
+
@@ -0,0 +1,129 @@
1
+ #ifndef _registry_h_
2
+ #define _registry_h_
3
+
4
+ #include "sysdep.h"
5
+ #include "bool.h"
6
+ #include "darray.h"
7
+
8
+ typedef struct Registry_st *Registry;
9
+
10
+ #ifdef __STDC__
11
+ typedef unsigned int (*Registry_HashFunc)(CONSTVOIDP, unsigned int);
12
+ typedef int (*Registry_CompareFunc)(CONSTVOIDP, CONSTVOIDP);
13
+ typedef NORET (*Registry_ActionProc)(VOIDP, VOIDP, VOIDP);
14
+ extern Registry Registry_create(Registry_CompareFunc, Registry_HashFunc);
15
+ extern NORET Registry_size_hint(Registry, unsigned int);
16
+ extern Bool Registry_add(Registry, VOIDP, VOIDP);
17
+ extern Bool Registry_remove(Registry, CONSTVOIDP);
18
+ extern VOIDP Registry_get(Registry, CONSTVOIDP);
19
+ extern VOIDP Registry_get_original_key(Registry, CONSTVOIDP);
20
+ extern VOIDP Registry_replace_value(Registry, VOIDP, VOIDP);
21
+ extern NORET Registry_traverse(Registry, Registry_ActionProc, VOIDP);
22
+ extern unsigned int Registry_entry_count(Registry);
23
+ extern NORET Registry_fetch_contents(Registry, Darray, Darray);
24
+ extern NORET Registry_destroy(Registry);
25
+ extern int Registry_ptrcmp(CONSTVOIDP, CONSTVOIDP);
26
+ extern unsigned int Registry_ptrhash(CONSTVOIDP, unsigned int);
27
+ extern int Registry_strcmp(CONSTVOIDP, CONSTVOIDP);
28
+ extern unsigned int Registry_strhash(CONSTVOIDP, unsigned int);
29
+ extern int Registry_strcasecmp(CONSTVOIDP, CONSTVOIDP);
30
+ extern unsigned int Registry_strcasehash(CONSTVOIDP, unsigned int);
31
+ #else
32
+ typedef unsigned int (*Registry_HashFunc)();
33
+ typedef int (*Registry_CompareFunc)();
34
+ typedef void (*Registry_ActionProc)();
35
+ extern Registry Registry_create();
36
+ extern NORET Registry_size_hint();
37
+ extern int Registry_add();
38
+ extern int Registry_remove();
39
+ extern VOIDP Registry_get();
40
+ extern VOIDP Registry_get_original_key();
41
+ extern VOIDP Registry_replace_value();
42
+ extern NORET Registry_traverse();
43
+ extern unsigned int Registry_entry_count();
44
+ extern NORET Registry_fetch_contents();
45
+ extern NORET Registry_destroy();
46
+ extern int Registry_ptrcmp();
47
+ extern unsigned int Registry_ptrhash();
48
+ extern int Registry_strcmp();
49
+ extern unsigned int Registry_strhash();
50
+ extern int Registry_strcasecmp();
51
+ extern unsigned int Registry_strcasehash();
52
+ #endif /* __STDC__ */
53
+
54
+
55
+ /*
56
+ * Registry_create(compare_func, hash_func)
57
+ * Creates and returns an empty registry. compare_func is used
58
+ * to compare items in the registry. It should return 0 if its
59
+ * arguments are to be considered equal. hash_func should return
60
+ * a number between 0 and its second argument, and should attempt
61
+ * an even distribution. If compare_func
62
+ * would return 0 for a pair of objects, hash_fuct should return
63
+ * the same value for those objects. For registries of abstract
64
+ * objects (pointers), Registry_ptrcmp() and Registry_ptrhash() should
65
+ * be passed as the compare_func and hash_func. Registry_strcmp and
66
+ * Registry_strhash() may be used for strings. Registry_strcasecmp and
67
+ * Registry_strcasehash() may be used for strings where case is not
68
+ * significant (case-insensitive).
69
+ *
70
+ * Registry_size_hint(registry, size_hint_value)
71
+ * The registry may operate more efficiently if this operator is called
72
+ * and size_hint is close to the maximum number of elements to be in
73
+ * the Registry, at the possible cost of additional memory use. Likely
74
+ * to be effective only on an empty registry.
75
+ *
76
+ * Registry_add(registry, key, value)
77
+ * Adds the association between key and value to the registry. Neither
78
+ * key nor value are copied, and neither may be freed before being removed
79
+ * from the registry. The key should not be modified in way that would
80
+ * change the value of the compare_func or the hash_func until this
81
+ * association is removed from the registry.
82
+ * Will return Bool_FALSE if an association with the
83
+ * same key is already in the registry (in which case the add will not be
84
+ * performed), Bool_TRUE otherwise (on successful completion).
85
+ *
86
+ * Registry_remove(registry, key)
87
+ * Removes the association with key from the registry. Returns Bool_FALSE
88
+ * if no such association exists, Bool_TRUE otherwise (on successful
89
+ * completion)
90
+ *
91
+ * Registry_get(registry, key)
92
+ * Returns the value associated with key in the registry. Returns NULL
93
+ * if there is no such association.
94
+ *
95
+ * Registry_get_original_key(registry, key) (added by Rich Pito 7/91)
96
+ * Finds a named object in a directory and returnd the original key
97
+ * used to index that object. Returns NULL if the named object is
98
+ * not in the directory. This is useful for getting the original
99
+ * string used to make an entry into a registry in order to free it.
100
+ * In this case, a pointer to the name should be storred, then the
101
+ * entry should be removed using Registry_remove, then the key may be
102
+ * freed
103
+ *
104
+ * Registry_traverse(registry, action_proc, private_pointer)
105
+ * Calls action_proc once for each entry in the registry. private_pointer
106
+ * is a VOIDP which is passed to the action_proc, but not otherwise used.
107
+ * action_proc should not modify the registry in any way. action_proc takes
108
+ * three arguments, the key, the value, and private_pointer.
109
+ *
110
+ * Registry_entry_count(registry)
111
+ * Returns the number of associations in the registry.
112
+ *
113
+ * Registry_fetch_contents(registry, key_darray, value_darray)
114
+ * Stores the contents of the registry as follows: In no particular
115
+ * order, each association is processed in turn by storing (using Darray_addh)
116
+ * the key into key_darray and the value into value_darray. Either
117
+ * or both key_darray and/or value_darray may be NULL, in which case
118
+ * the corresponding data will not be processed. Actual Darrays passed
119
+ * (not NULL) must be empty. Any objects added to key_darray must be
120
+ * treated as read-only as long as they remain in the registry.
121
+ *
122
+ * Registry_destroy(registry)
123
+ * Deallocates all resources needed by the registry. Should be the last
124
+ * operation performed on the registry. Does not deallocate the objects
125
+ * (keys and values) contained in the registry (this should be done after
126
+ * the registry is destroyed). Implicitly removes all associations
127
+ * from the registry.
128
+ */
129
+ #endif /* _resgistry_h_ */