rbtagger 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +21 -0
- data/History.txt +4 -0
- data/LICENSE +21 -0
- data/License.txt +20 -0
- data/Manifest.txt +75 -0
- data/PostInstall.txt +7 -0
- data/README +7 -0
- data/README.txt +53 -0
- data/Rakefile +33 -0
- data/config/hoe.rb +74 -0
- data/config/requirements.rb +15 -0
- data/ext/rule_tagger/bool.h +38 -0
- data/ext/rule_tagger/darray.c +292 -0
- data/ext/rule_tagger/darray.h +125 -0
- data/ext/rule_tagger/darrayP.h +50 -0
- data/ext/rule_tagger/extconf.rb +14 -0
- data/ext/rule_tagger/lex.c +170 -0
- data/ext/rule_tagger/lex.h +49 -0
- data/ext/rule_tagger/memory.c +127 -0
- data/ext/rule_tagger/memory.h +20 -0
- data/ext/rule_tagger/rbtagger.c +252 -0
- data/ext/rule_tagger/registry.c +326 -0
- data/ext/rule_tagger/registry.h +129 -0
- data/ext/rule_tagger/registryP.h +46 -0
- data/ext/rule_tagger/ruby-compat.h +20 -0
- data/ext/rule_tagger/rules.c +525 -0
- data/ext/rule_tagger/rules.h +42 -0
- data/ext/rule_tagger/sysdep.h +20 -0
- data/ext/rule_tagger/tagger.c +110 -0
- data/ext/rule_tagger/tagger.h +46 -0
- data/ext/rule_tagger/useful.c +44 -0
- data/ext/rule_tagger/useful.h +51 -0
- data/ext/word_tagger/extconf.rb +7 -0
- data/ext/word_tagger/porter_stemmer.c +430 -0
- data/ext/word_tagger/porter_stemmer.h +19 -0
- data/ext/word_tagger/rtagger.cc +83 -0
- data/ext/word_tagger/tagger.cc +153 -0
- data/ext/word_tagger/tagger.h +27 -0
- data/ext/word_tagger/tagger.rb +8 -0
- data/ext/word_tagger/test/Makefile +22 -0
- data/ext/word_tagger/test/doc.txt +87 -0
- data/ext/word_tagger/test/test.cc +107 -0
- data/ext/word_tagger/test.rb +31 -0
- data/lib/brill/tagger.rb +225 -0
- data/lib/rbtagger/version.rb +9 -0
- data/lib/rbtagger.rb +6 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/CONTEXTUALRULEFILE +284 -0
- data/test/LEXICALRULEFILE +148 -0
- data/test/LEXICON +93696 -0
- data/test/docs/doc0.txt +20 -0
- data/test/docs/doc1.txt +11 -0
- data/test/docs/doc2.txt +52 -0
- data/test/docs/doc3.txt +128 -0
- data/test/docs/doc4.txt +337 -0
- data/test/docs/doc5.txt +497 -0
- data/test/docs/doc6.txt +116 -0
- data/test/docs/doc7.txt +101 -0
- data/test/docs/doc8.txt +25 -0
- data/test/docs/doc9.txt +84 -0
- data/test/tagger_test.rb +60 -0
- data/test/test_helper.rb +2 -0
- data/tools/rakehelp.rb +113 -0
- data/website/index.html +113 -0
- data/website/index.txt +53 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.html.erb +48 -0
- metadata +155 -0
@@ -0,0 +1,252 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2008 Todd A. Fisher
|
3
|
+
* see LICENSE
|
4
|
+
*/
|
5
|
+
#include "ruby.h"
|
6
|
+
#include "tagger.h"
|
7
|
+
#include "ruby-compat.h"
|
8
|
+
|
9
|
+
static VALUE rb_Tagger;
|
10
|
+
static VALUE rb_BrillTagger;
|
11
|
+
|
12
|
+
static
|
13
|
+
VALUE BrillTagger_alloc(VALUE klass)
|
14
|
+
{
|
15
|
+
VALUE object;
|
16
|
+
TaggerContext *tc = tagger_context_new();
|
17
|
+
object = Data_Wrap_Struct( klass, NULL, tagger_context_free, tc );
|
18
|
+
return object;
|
19
|
+
}
|
20
|
+
|
21
|
+
static VALUE
|
22
|
+
BrillTagger_add_to_lexicon( VALUE self, VALUE word, VALUE tag )
|
23
|
+
{
|
24
|
+
TaggerContext *tc;
|
25
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
26
|
+
tagger_context_add_to_lexicon( tc, RSTRING_PTR(word), RSTRING_PTR(tag) );
|
27
|
+
return Qnil;
|
28
|
+
}
|
29
|
+
static VALUE
|
30
|
+
BrillTagger_add_to_lexicon_tags( VALUE self, VALUE bigram )
|
31
|
+
{
|
32
|
+
TaggerContext *tc;
|
33
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
34
|
+
tagger_context_add_to_lexicon_tags( tc, RSTRING_PTR(bigram) );
|
35
|
+
return Qnil;
|
36
|
+
}
|
37
|
+
static VALUE
|
38
|
+
BrillTagger_add_contextual_rule( VALUE self, VALUE rule )
|
39
|
+
{
|
40
|
+
TaggerContext *tc;
|
41
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
42
|
+
tagger_context_add_contextual_rule( tc, RSTRING_PTR(rule) );
|
43
|
+
return Qnil;
|
44
|
+
}
|
45
|
+
static VALUE
|
46
|
+
BrillTagger_add_lexical_rule( VALUE self, VALUE rule )
|
47
|
+
{
|
48
|
+
TaggerContext *tc;
|
49
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
50
|
+
tagger_context_add_lexical_rule( tc, RSTRING_PTR(rule) );
|
51
|
+
return Qnil;
|
52
|
+
}
|
53
|
+
static VALUE
|
54
|
+
BrillTagger_add_word_to_wordlist( VALUE self, VALUE word )
|
55
|
+
{
|
56
|
+
TaggerContext *tc;
|
57
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
58
|
+
tagger_context_add_word_to_wordlist( tc, RSTRING_PTR(word) );
|
59
|
+
return Qnil;
|
60
|
+
}
|
61
|
+
|
62
|
+
static VALUE
|
63
|
+
BrillTagger_add_goodleft( VALUE self, VALUE word )
|
64
|
+
{
|
65
|
+
TaggerContext *tc;
|
66
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
67
|
+
tagger_context_add_goodleft( tc, RSTRING_PTR(word) );
|
68
|
+
return Qnil;
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE
|
72
|
+
BrillTagger_add_goodright( VALUE self, VALUE word )
|
73
|
+
{
|
74
|
+
TaggerContext *tc;
|
75
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
76
|
+
tagger_context_add_goodright( tc, RSTRING_PTR(word) );
|
77
|
+
return Qnil;
|
78
|
+
}
|
79
|
+
|
80
|
+
static VALUE
|
81
|
+
BrillTagger_apply_lexical_rules( VALUE self, VALUE tokens, VALUE tags, VALUE wordlist, VALUE extrawds )
|
82
|
+
{
|
83
|
+
TaggerContext *tc;
|
84
|
+
int i = 0;
|
85
|
+
int token_length = RARRAY(tokens)->len;
|
86
|
+
int tags_length = RARRAY(tags)->len;
|
87
|
+
int rules_length;
|
88
|
+
VALUE fetched;
|
89
|
+
int EXTRAWDS = NUM2INT( extrawds );
|
90
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
91
|
+
|
92
|
+
if( token_length != tags_length ){
|
93
|
+
rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
|
94
|
+
return Qnil;
|
95
|
+
}
|
96
|
+
|
97
|
+
Darray text_array = Darray_create();
|
98
|
+
Darray tag_array = Darray_create();
|
99
|
+
|
100
|
+
Darray_hint( text_array, token_length, token_length );
|
101
|
+
Darray_hint( tag_array, token_length, token_length );
|
102
|
+
|
103
|
+
for( i = 0; i < token_length; ++i ){
|
104
|
+
fetched = rb_ary_entry(tokens,i);
|
105
|
+
if( fetched == Qnil ){
|
106
|
+
fprintf(stderr, "token missing %d of %d\n", i, token_length );
|
107
|
+
rb_raise(rb_eArgError, "Token was missing unexpectedly");
|
108
|
+
return Qnil;
|
109
|
+
}
|
110
|
+
Darray_addh(text_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
|
111
|
+
fetched = rb_ary_entry(tags,i);
|
112
|
+
if( fetched == Qnil ){
|
113
|
+
fprintf(stderr, "tag missing %d of %d\n", i, token_length );
|
114
|
+
rb_raise(rb_eArgError, "Tag was missing unexpectedly");
|
115
|
+
return Qnil;
|
116
|
+
}
|
117
|
+
Darray_addh(tag_array, (VOIDP)strdup(RSTRING_PTR(fetched)) );
|
118
|
+
}
|
119
|
+
rules_length = Darray_len(tc->rule_array);
|
120
|
+
/* Apply the rules */
|
121
|
+
for( i = 0; i < rules_length; ++i ) {
|
122
|
+
apply_lexical_rule( Darray_get(tc->rule_array, i),
|
123
|
+
text_array, tag_array,
|
124
|
+
tc->lexicon_hash,
|
125
|
+
tc->wordlist_hash,
|
126
|
+
tc->bigram_hash,
|
127
|
+
EXTRAWDS );
|
128
|
+
}
|
129
|
+
/* Stuff the results back into the ruby arrays */
|
130
|
+
for( i = 0; i < token_length; ++i ) {
|
131
|
+
char *text_strref = (char*)Darray_get( text_array, i );
|
132
|
+
char *tag_strref = (char*)Darray_get( tag_array, i );
|
133
|
+
|
134
|
+
// copy into ruby space
|
135
|
+
rb_ary_store( tokens, i, rb_str_new2(text_strref) );
|
136
|
+
rb_ary_store( tags, i, rb_str_new2( tag_strref ) );
|
137
|
+
|
138
|
+
free( text_strref );
|
139
|
+
free( tag_strref );
|
140
|
+
}
|
141
|
+
|
142
|
+
Darray_destroy(text_array);
|
143
|
+
Darray_destroy(tag_array);
|
144
|
+
|
145
|
+
return Qnil;
|
146
|
+
}
|
147
|
+
static VALUE
|
148
|
+
BrillTagger_default_tag_finish( VALUE self, VALUE tokens, VALUE tags )
|
149
|
+
{
|
150
|
+
int i;
|
151
|
+
VALUE fetched, word;
|
152
|
+
char *tempstr;
|
153
|
+
int token_length = RARRAY(tokens)->len;
|
154
|
+
int tags_length = RARRAY(tags)->len;
|
155
|
+
TaggerContext *tc;
|
156
|
+
|
157
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
158
|
+
|
159
|
+
if( token_length != tags_length ){
|
160
|
+
rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
|
161
|
+
return Qnil;
|
162
|
+
}
|
163
|
+
|
164
|
+
for( i = 0; i < token_length; ++i ){
|
165
|
+
fetched = rb_ary_entry(tokens,i);
|
166
|
+
if( fetched == Qnil ){
|
167
|
+
rb_raise(rb_eArgError, "Token was missing unexpectedly");
|
168
|
+
return Qnil;
|
169
|
+
}
|
170
|
+
word = fetched;
|
171
|
+
|
172
|
+
if( (tempstr = Registry_get(tc->lexicon_hash, RSTRING_PTR(word))) != NULL ){
|
173
|
+
//fetched = rb_ary_entry(tags,i);
|
174
|
+
//printf( "'%s'/%s -> %s\n", RSTRING_PTR(word), RSTRING_PTR(fetched), tempstr );
|
175
|
+
rb_ary_store( tags, i, rb_str_new2(tempstr) );
|
176
|
+
}
|
177
|
+
}
|
178
|
+
return Qnil;
|
179
|
+
}
|
180
|
+
|
181
|
+
static VALUE
|
182
|
+
BrillTagger_apply_contextual_rules( VALUE self, VALUE tokens, VALUE tags, VALUE rmove )
|
183
|
+
{
|
184
|
+
int i;
|
185
|
+
int token_length = RARRAY(tokens)->len;
|
186
|
+
int tags_length = RARRAY(tags)->len;
|
187
|
+
int rules_length;
|
188
|
+
int restrict_move = NUM2INT( rmove );
|
189
|
+
char **text_tags, **text_tokens;
|
190
|
+
VALUE fetched;
|
191
|
+
TaggerContext *tc;
|
192
|
+
Data_Get_Struct( self, TaggerContext, tc );
|
193
|
+
|
194
|
+
if( token_length != tags_length ){
|
195
|
+
rb_raise(rb_eArgError, "Error: tags and tokens must be of equal length!");
|
196
|
+
return Qnil;
|
197
|
+
}
|
198
|
+
if( restrict_move && Registry_entry_count( tc->lexicon_hash ) == 0 ){
|
199
|
+
rb_raise(rb_eArgError, "Must load a leicon before applying contextual rules");
|
200
|
+
return Qnil;
|
201
|
+
}
|
202
|
+
|
203
|
+
text_tags = (char**)malloc(sizeof(char*) * tags_length );
|
204
|
+
text_tokens = (char**)malloc(sizeof(char*) * token_length );
|
205
|
+
|
206
|
+
// load the tokens and tags into the char * arrays
|
207
|
+
for( i = 0; i < token_length; ++i ){
|
208
|
+
fetched = rb_ary_entry(tokens,i);
|
209
|
+
text_tokens[i] = strdup(RSTRING_PTR(fetched));
|
210
|
+
fetched = rb_ary_entry(tags,i);
|
211
|
+
text_tags[i] = strdup(RSTRING_PTR(fetched));
|
212
|
+
}
|
213
|
+
|
214
|
+
rules_length = Darray_len(tc->contextual_rule_array);
|
215
|
+
// Apply the rules
|
216
|
+
for( i = 0; i < rules_length; ++i ){
|
217
|
+
apply_contextual_rule(Darray_get(tc->contextual_rule_array, i),
|
218
|
+
text_tokens, text_tags, token_length,
|
219
|
+
restrict_move, tc->lexicon_hash, tc->lexicon_tag_hash);
|
220
|
+
}
|
221
|
+
|
222
|
+
// load the results back into ruby arrays
|
223
|
+
for( i = 0; i < token_length; ++i ){
|
224
|
+
rb_ary_store( tags, i, rb_str_new2(text_tags[i]) );
|
225
|
+
free(text_tags[i]);
|
226
|
+
free(text_tokens[i]);
|
227
|
+
}
|
228
|
+
|
229
|
+
free( text_tags );
|
230
|
+
free( text_tokens );
|
231
|
+
|
232
|
+
return Qnil;
|
233
|
+
}
|
234
|
+
|
235
|
+
void Init_rule_tagger()
|
236
|
+
{
|
237
|
+
rb_Tagger = rb_define_module( "Tagger" );
|
238
|
+
rb_BrillTagger = rb_define_class_under( rb_Tagger, "BrillTagger", rb_cObject );
|
239
|
+
|
240
|
+
rb_define_alloc_func( rb_BrillTagger, BrillTagger_alloc );
|
241
|
+
|
242
|
+
rb_define_method( rb_BrillTagger, "add_to_lexicon", BrillTagger_add_to_lexicon, 2 );
|
243
|
+
rb_define_method( rb_BrillTagger, "add_to_lexicon_tags", BrillTagger_add_to_lexicon_tags, 1 );
|
244
|
+
rb_define_method( rb_BrillTagger, "add_lexical_rule", BrillTagger_add_lexical_rule, 1 );
|
245
|
+
rb_define_method( rb_BrillTagger, "add_contextual_rule", BrillTagger_add_contextual_rule, 1 );
|
246
|
+
rb_define_method( rb_BrillTagger, "add_word_to_wordlist", BrillTagger_add_word_to_wordlist, 1 );
|
247
|
+
rb_define_method( rb_BrillTagger, "add_goodleft", BrillTagger_add_goodleft, 1 );
|
248
|
+
rb_define_method( rb_BrillTagger, "add_goodright", BrillTagger_add_goodright, 1 );
|
249
|
+
rb_define_method( rb_BrillTagger, "apply_lexical_rules", BrillTagger_apply_lexical_rules, 4 );
|
250
|
+
rb_define_method( rb_BrillTagger, "default_tag_finish", BrillTagger_default_tag_finish, 2 );
|
251
|
+
rb_define_method( rb_BrillTagger, "apply_contextual_rules", BrillTagger_apply_contextual_rules, 3 );
|
252
|
+
}
|
@@ -0,0 +1,326 @@
|
|
1
|
+
#include <stddef.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <assert.h>
|
6
|
+
#include <ctype.h>
|
7
|
+
|
8
|
+
#include "sysdep.h"
|
9
|
+
#include "memory.h"
|
10
|
+
#include "bool.h"
|
11
|
+
#include "useful.h"
|
12
|
+
|
13
|
+
#include "registryP.h"
|
14
|
+
|
15
|
+
/* Creates and returns and empty directory */
|
16
|
+
|
17
|
+
Registry Registry_create(compare_func, hash_func)
|
18
|
+
Registry_CompareFunc compare_func;
|
19
|
+
Registry_HashFunc hash_func;
|
20
|
+
{
|
21
|
+
Registry_rep *temp = create();
|
22
|
+
|
23
|
+
temp->hash_table = NULL;
|
24
|
+
temp->ht_size = (unsigned int)0;
|
25
|
+
temp->comp_fun = compare_func;
|
26
|
+
temp->hash_fun = hash_func;
|
27
|
+
temp->record_count = (unsigned int)0;
|
28
|
+
Registry_size_hint(raise(temp), DEFAULT_HT_SIZE);
|
29
|
+
return raise(temp);
|
30
|
+
}
|
31
|
+
|
32
|
+
/* Deal with the expected size value. */
|
33
|
+
|
34
|
+
NORET Registry_size_hint(dir, size_hint_value)
|
35
|
+
Registry dir;
|
36
|
+
unsigned int size_hint_value;
|
37
|
+
{
|
38
|
+
int i;
|
39
|
+
if (lower(dir)->record_count != (unsigned int)0) return;
|
40
|
+
if (lower(dir)->ht_size != (unsigned int)0)
|
41
|
+
Memory_free((VOIDP)lower(dir)->hash_table);
|
42
|
+
lower(dir)->ht_size = size_hint_value;
|
43
|
+
lower(dir)->hash_table =
|
44
|
+
(RegistryRecord **)Memory_allocate(sizeof(RegistryRecord *)
|
45
|
+
* size_hint_value);
|
46
|
+
for (i=0; i < size_hint_value; ++i)
|
47
|
+
*(lower(dir)->hash_table + i) = (RegistryRecord *)NULL;
|
48
|
+
}
|
49
|
+
|
50
|
+
/* Finds a named object in a directory. Returns NULL if the named
|
51
|
+
* object is not in the directory */
|
52
|
+
|
53
|
+
VOIDP Registry_get(dir, key)
|
54
|
+
Registry dir;
|
55
|
+
CONSTVOIDP key;
|
56
|
+
{
|
57
|
+
RegistryRecord *p;
|
58
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
59
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
60
|
+
|
61
|
+
assert(comp_func);
|
62
|
+
for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
|
63
|
+
p != NULL;
|
64
|
+
p = p->next) {
|
65
|
+
if ((*comp_func)(key, p->name) == 0)
|
66
|
+
return p->obj;
|
67
|
+
}
|
68
|
+
return NULL; /* not found */
|
69
|
+
}
|
70
|
+
|
71
|
+
/* Finds a named object in a directory and returnd the original key */
|
72
|
+
/* used to index that object. Returns NULL if the named object is */
|
73
|
+
/* not in the directory. This is useful for getting the original */
|
74
|
+
/* string used to make an entry into a registry in order to free it. */
|
75
|
+
/* In this case, a pointer to the name should be storred, then the */
|
76
|
+
/* entry should be removed using Registry_remove, then the key may be */
|
77
|
+
/* freed */
|
78
|
+
|
79
|
+
VOIDP Registry_get_original_key(dir, key)
|
80
|
+
Registry dir;
|
81
|
+
CONSTVOIDP key;
|
82
|
+
{
|
83
|
+
RegistryRecord *p;
|
84
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
85
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
86
|
+
|
87
|
+
for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
|
88
|
+
p != NULL;
|
89
|
+
p = p->next) {
|
90
|
+
if ((*comp_func)(key, p->name) == 0)
|
91
|
+
return p->name;
|
92
|
+
}
|
93
|
+
return NULL; /* not found */
|
94
|
+
}
|
95
|
+
|
96
|
+
/* Adds a named object to a directory. Returns Bool_TRUE unless an error occurs.
|
97
|
+
* An error will occur if Registry_get(dir, name) would succeed (return
|
98
|
+
* non-NULL) */
|
99
|
+
|
100
|
+
Bool Registry_add(dir, name, obj)
|
101
|
+
Registry dir;
|
102
|
+
VOIDP name;
|
103
|
+
VOIDP obj;
|
104
|
+
{
|
105
|
+
RegistryRecord *p;
|
106
|
+
RegistryRecord **table_entry;
|
107
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
108
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
109
|
+
|
110
|
+
table_entry = lower(dir)->hash_table + (*hash_func)(name, lower(dir)->ht_size);
|
111
|
+
|
112
|
+
for (p = *table_entry;
|
113
|
+
p != NULL;
|
114
|
+
p = p->next) {
|
115
|
+
if ((*comp_func)(name, p->name) == 0)
|
116
|
+
return Bool_FALSE;
|
117
|
+
}
|
118
|
+
|
119
|
+
p = (RegistryRecord *)Memory_allocate(sizeof(RegistryRecord));
|
120
|
+
p->next = *table_entry;
|
121
|
+
p->name = name;
|
122
|
+
p->obj = obj;
|
123
|
+
*table_entry = p;
|
124
|
+
++(lower(dir)->record_count);
|
125
|
+
return Bool_TRUE;
|
126
|
+
}
|
127
|
+
|
128
|
+
/* Removes a named object from the directory. Returns Bool_TRUE unless an
|
129
|
+
* error occurs (Bool_FALSE if an error does occur). The object is
|
130
|
+
* not freed. It is the responsibility of the
|
131
|
+
* caller to do so if necessary.
|
132
|
+
*/
|
133
|
+
|
134
|
+
Bool Registry_remove(dir, key)
|
135
|
+
Registry dir;
|
136
|
+
CONSTVOIDP key;
|
137
|
+
{
|
138
|
+
RegistryRecord *p, **prev_p;
|
139
|
+
Registry_rep *ldir = lower(dir);
|
140
|
+
Registry_CompareFunc comp_func = ldir->comp_fun;
|
141
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
142
|
+
|
143
|
+
prev_p = lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size);
|
144
|
+
while ((p = *prev_p) != NULL) {
|
145
|
+
if ((*comp_func)(key, p->name) == 0) {
|
146
|
+
*prev_p = p->next;
|
147
|
+
Memory_free((VOIDP)p);
|
148
|
+
--(ldir->record_count);
|
149
|
+
return Bool_TRUE;
|
150
|
+
}
|
151
|
+
prev_p = &(p->next);
|
152
|
+
}
|
153
|
+
return Bool_FALSE;
|
154
|
+
}
|
155
|
+
|
156
|
+
/* Replaces an association in the registry. If an association with the
|
157
|
+
* given key already exists, the value is changed to new_value, and the
|
158
|
+
* old value is returned. If no association already exists, one is added
|
159
|
+
* and NULL is returned. */
|
160
|
+
|
161
|
+
VOIDP Registry_replace_value(dir, key, new_value)
|
162
|
+
Registry dir;
|
163
|
+
VOIDP key;
|
164
|
+
VOIDP new_value;
|
165
|
+
{
|
166
|
+
RegistryRecord *p;
|
167
|
+
Registry_CompareFunc comp_func = lower(dir)->comp_fun;
|
168
|
+
Registry_HashFunc hash_func = lower(dir)->hash_fun;
|
169
|
+
|
170
|
+
VOIDP temp_obj;
|
171
|
+
for (p = *(lower(dir)->hash_table + (*hash_func)(key, lower(dir)->ht_size));
|
172
|
+
p != NULL;
|
173
|
+
p = p->next) {
|
174
|
+
if ((*comp_func)(key, p->name) == 0) {
|
175
|
+
temp_obj = p->obj;
|
176
|
+
p->obj = new_value;
|
177
|
+
return temp_obj;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
Registry_add(dir, key, new_value);
|
181
|
+
return NULL; /* not found */
|
182
|
+
}
|
183
|
+
|
184
|
+
NORET Registry_traverse(dir, action, priv_ptr)
|
185
|
+
Registry dir;
|
186
|
+
Registry_ActionProc action;
|
187
|
+
VOIDP priv_ptr;
|
188
|
+
{
|
189
|
+
RegistryRecord *p;
|
190
|
+
int i;
|
191
|
+
|
192
|
+
for (i = 0; i < lower(dir)->ht_size; ++i)
|
193
|
+
for (p = *(lower(dir)->hash_table + i);
|
194
|
+
p != NULL;
|
195
|
+
p = p->next) {
|
196
|
+
(*action)(p->name, p->obj, priv_ptr);
|
197
|
+
}
|
198
|
+
return;
|
199
|
+
}
|
200
|
+
|
201
|
+
unsigned int Registry_entry_count(dir)
|
202
|
+
Registry dir;
|
203
|
+
{
|
204
|
+
return lower(dir)->record_count;
|
205
|
+
}
|
206
|
+
|
207
|
+
static NORET add_to_darrays(key, value, priv_ptr)
|
208
|
+
VOIDP key;
|
209
|
+
VOIDP value;
|
210
|
+
VOIDP priv_ptr;
|
211
|
+
{
|
212
|
+
struct darray_pair *dapp = (struct darray_pair *) priv_ptr;
|
213
|
+
|
214
|
+
if (dapp->key_darray != NULL)
|
215
|
+
Darray_addh(dapp->key_darray, (VOIDP)key); /* Specs forbid mods to key */
|
216
|
+
if (dapp->value_darray != NULL)
|
217
|
+
Darray_addh(dapp->value_darray, value);
|
218
|
+
}
|
219
|
+
|
220
|
+
NORET Registry_fetch_contents(dir, key_darray, value_darray)
|
221
|
+
Registry dir;
|
222
|
+
Darray key_darray, value_darray;
|
223
|
+
{
|
224
|
+
struct darray_pair dap;
|
225
|
+
|
226
|
+
assert (key_darray == NULL || Darray_len(key_darray) == 0);
|
227
|
+
assert (value_darray == NULL || Darray_len(value_darray) == 0);
|
228
|
+
|
229
|
+
dap.key_darray = key_darray;
|
230
|
+
dap.value_darray = value_darray;
|
231
|
+
|
232
|
+
Registry_traverse(dir, add_to_darrays, (VOIDP)&dap);
|
233
|
+
}
|
234
|
+
|
235
|
+
NORET Registry_destroy(dir)
|
236
|
+
Registry dir;
|
237
|
+
{
|
238
|
+
RegistryRecord *p, *next;
|
239
|
+
int i;
|
240
|
+
|
241
|
+
for (i = 0; i < lower(dir)->ht_size; ++i)
|
242
|
+
for (p = *(lower(dir)->hash_table + i);
|
243
|
+
p != NULL;
|
244
|
+
p = next) {
|
245
|
+
next = p->next;
|
246
|
+
Memory_free((VOIDP)p);
|
247
|
+
}
|
248
|
+
Memory_free((VOIDP)lower(dir)->hash_table);
|
249
|
+
destroy(lower(dir));
|
250
|
+
}
|
251
|
+
|
252
|
+
unsigned int Registry_ptrhash(ptr, htsize)
|
253
|
+
CONSTVOIDP ptr;
|
254
|
+
unsigned int htsize;
|
255
|
+
{
|
256
|
+
unsigned int uns_int_ptr = (unsigned int)ptr;
|
257
|
+
|
258
|
+
if ((int)uns_int_ptr > 0)
|
259
|
+
return htsize ? (int)uns_int_ptr % (int)htsize : 0;
|
260
|
+
else
|
261
|
+
return htsize ? uns_int_ptr % htsize : 0;
|
262
|
+
}
|
263
|
+
|
264
|
+
unsigned int Registry_strhash(strp, htsize)
|
265
|
+
CONSTVOIDP strp;
|
266
|
+
unsigned int htsize;
|
267
|
+
{
|
268
|
+
char *cp = (char *)strp;
|
269
|
+
int hash_temp = 0;
|
270
|
+
|
271
|
+
while (*cp != '\0') {
|
272
|
+
if (hash_temp < 0)
|
273
|
+
hash_temp = (hash_temp << 1) +1;
|
274
|
+
else
|
275
|
+
hash_temp = hash_temp << 1;
|
276
|
+
hash_temp ^= *cp;
|
277
|
+
++cp;
|
278
|
+
}
|
279
|
+
return htsize ? ((unsigned int)hash_temp) % htsize : 0;
|
280
|
+
}
|
281
|
+
|
282
|
+
unsigned int Registry_strcasehash(strp, htsize)
|
283
|
+
CONSTVOIDP strp;
|
284
|
+
unsigned int htsize;
|
285
|
+
{
|
286
|
+
char *cp = (char *)strp;
|
287
|
+
int hash_temp = 0;
|
288
|
+
|
289
|
+
while (*cp != '\0') {
|
290
|
+
if (hash_temp < 0)
|
291
|
+
hash_temp = (hash_temp << 1) +1;
|
292
|
+
else
|
293
|
+
hash_temp = hash_temp << 1;
|
294
|
+
if (isalpha(*cp) && isupper(*cp))
|
295
|
+
hash_temp ^= tolower(*cp);
|
296
|
+
else
|
297
|
+
hash_temp ^= *cp;
|
298
|
+
++cp;
|
299
|
+
}
|
300
|
+
return htsize ? ((unsigned int)hash_temp) % htsize : 0;
|
301
|
+
}
|
302
|
+
|
303
|
+
int Registry_strcmp(str1, str2)
|
304
|
+
CONSTVOIDP str1;
|
305
|
+
CONSTVOIDP str2;
|
306
|
+
{
|
307
|
+
return strcmp((char *)str1, (char *)str2);
|
308
|
+
}
|
309
|
+
|
310
|
+
int Registry_strcasecmp(str1, str2)
|
311
|
+
CONSTVOIDP str1;
|
312
|
+
CONSTVOIDP str2;
|
313
|
+
{
|
314
|
+
return strcasecmp((char *)str1, (char *)str2);
|
315
|
+
}
|
316
|
+
|
317
|
+
int Registry_ptrcmp(ptr1, ptr2)
|
318
|
+
CONSTVOIDP ptr1;
|
319
|
+
CONSTVOIDP ptr2;
|
320
|
+
{
|
321
|
+
if (ptr1==ptr2)
|
322
|
+
return 0;
|
323
|
+
else
|
324
|
+
return 1;
|
325
|
+
}
|
326
|
+
|
@@ -0,0 +1,129 @@
|
|
1
|
+
#ifndef _registry_h_
|
2
|
+
#define _registry_h_
|
3
|
+
|
4
|
+
#include "sysdep.h"
|
5
|
+
#include "bool.h"
|
6
|
+
#include "darray.h"
|
7
|
+
|
8
|
+
typedef struct Registry_st *Registry;
|
9
|
+
|
10
|
+
#ifdef __STDC__
|
11
|
+
typedef unsigned int (*Registry_HashFunc)(CONSTVOIDP, unsigned int);
|
12
|
+
typedef int (*Registry_CompareFunc)(CONSTVOIDP, CONSTVOIDP);
|
13
|
+
typedef NORET (*Registry_ActionProc)(VOIDP, VOIDP, VOIDP);
|
14
|
+
extern Registry Registry_create(Registry_CompareFunc, Registry_HashFunc);
|
15
|
+
extern NORET Registry_size_hint(Registry, unsigned int);
|
16
|
+
extern Bool Registry_add(Registry, VOIDP, VOIDP);
|
17
|
+
extern Bool Registry_remove(Registry, CONSTVOIDP);
|
18
|
+
extern VOIDP Registry_get(Registry, CONSTVOIDP);
|
19
|
+
extern VOIDP Registry_get_original_key(Registry, CONSTVOIDP);
|
20
|
+
extern VOIDP Registry_replace_value(Registry, VOIDP, VOIDP);
|
21
|
+
extern NORET Registry_traverse(Registry, Registry_ActionProc, VOIDP);
|
22
|
+
extern unsigned int Registry_entry_count(Registry);
|
23
|
+
extern NORET Registry_fetch_contents(Registry, Darray, Darray);
|
24
|
+
extern NORET Registry_destroy(Registry);
|
25
|
+
extern int Registry_ptrcmp(CONSTVOIDP, CONSTVOIDP);
|
26
|
+
extern unsigned int Registry_ptrhash(CONSTVOIDP, unsigned int);
|
27
|
+
extern int Registry_strcmp(CONSTVOIDP, CONSTVOIDP);
|
28
|
+
extern unsigned int Registry_strhash(CONSTVOIDP, unsigned int);
|
29
|
+
extern int Registry_strcasecmp(CONSTVOIDP, CONSTVOIDP);
|
30
|
+
extern unsigned int Registry_strcasehash(CONSTVOIDP, unsigned int);
|
31
|
+
#else
|
32
|
+
typedef unsigned int (*Registry_HashFunc)();
|
33
|
+
typedef int (*Registry_CompareFunc)();
|
34
|
+
typedef void (*Registry_ActionProc)();
|
35
|
+
extern Registry Registry_create();
|
36
|
+
extern NORET Registry_size_hint();
|
37
|
+
extern int Registry_add();
|
38
|
+
extern int Registry_remove();
|
39
|
+
extern VOIDP Registry_get();
|
40
|
+
extern VOIDP Registry_get_original_key();
|
41
|
+
extern VOIDP Registry_replace_value();
|
42
|
+
extern NORET Registry_traverse();
|
43
|
+
extern unsigned int Registry_entry_count();
|
44
|
+
extern NORET Registry_fetch_contents();
|
45
|
+
extern NORET Registry_destroy();
|
46
|
+
extern int Registry_ptrcmp();
|
47
|
+
extern unsigned int Registry_ptrhash();
|
48
|
+
extern int Registry_strcmp();
|
49
|
+
extern unsigned int Registry_strhash();
|
50
|
+
extern int Registry_strcasecmp();
|
51
|
+
extern unsigned int Registry_strcasehash();
|
52
|
+
#endif /* __STDC__ */
|
53
|
+
|
54
|
+
|
55
|
+
/*
|
56
|
+
* Registry_create(compare_func, hash_func)
|
57
|
+
* Creates and returns an empty registry. compare_func is used
|
58
|
+
* to compare items in the registry. It should return 0 if its
|
59
|
+
* arguments are to be considered equal. hash_func should return
|
60
|
+
* a number between 0 and its second argument, and should attempt
|
61
|
+
* an even distribution. If compare_func
|
62
|
+
* would return 0 for a pair of objects, hash_fuct should return
|
63
|
+
* the same value for those objects. For registries of abstract
|
64
|
+
* objects (pointers), Registry_ptrcmp() and Registry_ptrhash() should
|
65
|
+
* be passed as the compare_func and hash_func. Registry_strcmp and
|
66
|
+
* Registry_strhash() may be used for strings. Registry_strcasecmp and
|
67
|
+
* Registry_strcasehash() may be used for strings where case is not
|
68
|
+
* significant (case-insensitive).
|
69
|
+
*
|
70
|
+
* Registry_size_hint(registry, size_hint_value)
|
71
|
+
* The registry may operate more efficiently if this operator is called
|
72
|
+
* and size_hint is close to the maximum number of elements to be in
|
73
|
+
* the Registry, at the possible cost of additional memory use. Likely
|
74
|
+
* to be effective only on an empty registry.
|
75
|
+
*
|
76
|
+
* Registry_add(registry, key, value)
|
77
|
+
* Adds the association between key and value to the registry. Neither
|
78
|
+
* key nor value are copied, and neither may be freed before being removed
|
79
|
+
* from the registry. The key should not be modified in way that would
|
80
|
+
* change the value of the compare_func or the hash_func until this
|
81
|
+
* association is removed from the registry.
|
82
|
+
* Will return Bool_FALSE if an association with the
|
83
|
+
* same key is already in the registry (in which case the add will not be
|
84
|
+
* performed), Bool_TRUE otherwise (on successful completion).
|
85
|
+
*
|
86
|
+
* Registry_remove(registry, key)
|
87
|
+
* Removes the association with key from the registry. Returns Bool_FALSE
|
88
|
+
* if no such association exists, Bool_TRUE otherwise (on successful
|
89
|
+
* completion)
|
90
|
+
*
|
91
|
+
* Registry_get(registry, key)
|
92
|
+
* Returns the value associated with key in the registry. Returns NULL
|
93
|
+
* if there is no such association.
|
94
|
+
*
|
95
|
+
* Registry_get_original_key(registry, key) (added by Rich Pito 7/91)
|
96
|
+
* Finds a named object in a directory and returnd the original key
|
97
|
+
* used to index that object. Returns NULL if the named object is
|
98
|
+
* not in the directory. This is useful for getting the original
|
99
|
+
* string used to make an entry into a registry in order to free it.
|
100
|
+
* In this case, a pointer to the name should be storred, then the
|
101
|
+
* entry should be removed using Registry_remove, then the key may be
|
102
|
+
* freed
|
103
|
+
*
|
104
|
+
* Registry_traverse(registry, action_proc, private_pointer)
|
105
|
+
* Calls action_proc once for each entry in the registry. private_pointer
|
106
|
+
* is a VOIDP which is passed to the action_proc, but not otherwise used.
|
107
|
+
* action_proc should not modify the registry in any way. action_proc takes
|
108
|
+
* three arguments, the key, the value, and private_pointer.
|
109
|
+
*
|
110
|
+
* Registry_entry_count(registry)
|
111
|
+
* Returns the number of associations in the registry.
|
112
|
+
*
|
113
|
+
* Registry_fetch_contents(registry, key_darray, value_darray)
|
114
|
+
* Stores the contents of the registry as follows: In no particular
|
115
|
+
* order, each association is processed in turn by storing (using Darray_addh)
|
116
|
+
* the key into key_darray and the value into value_darray. Either
|
117
|
+
* or both key_darray and/or value_darray may be NULL, in which case
|
118
|
+
* the corresponding data will not be processed. Actual Darrays passed
|
119
|
+
* (not NULL) must be empty. Any objects added to key_darray must be
|
120
|
+
* treated as read-only as long as they remain in the registry.
|
121
|
+
*
|
122
|
+
* Registry_destroy(registry)
|
123
|
+
* Deallocates all resources needed by the registry. Should be the last
|
124
|
+
* operation performed on the registry. Does not deallocate the objects
|
125
|
+
* (keys and values) contained in the registry (this should be done after
|
126
|
+
* the registry is destroyed). Implicitly removes all associations
|
127
|
+
* from the registry.
|
128
|
+
*/
|
129
|
+
#endif /* _resgistry_h_ */
|