ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/document.c ADDED
@@ -0,0 +1,336 @@
1
+ #include <document.h>
2
+ #include <string.h>
3
+
4
+ /****************************************************************************
5
+ *
6
+ * DocField
7
+ *
8
+ ****************************************************************************/
9
+
10
+ inline void df_set(DocField *df, const char *name,
11
+ char *data, int store, int index, int tv)
12
+ {
13
+ if ((index == DF_INDEX_NO) && (store == DF_STORE_NO))
14
+ eprintf(ARG_ERROR,
15
+ "it doesn't make sense to have a field that is neither indexed nor stored");
16
+ if ((index == DF_INDEX_NO) && (tv != DF_TERM_VECTOR_NO))
17
+ eprintf(ARG_ERROR,
18
+ "cannot store term vector information for a field that is not indexed");
19
+ df->name = estrdup(name);
20
+ df->data = data;
21
+ df->blen = strlen(data);
22
+ df_set_store(df, store);
23
+ df_set_index(df, index);
24
+ df_set_term_vector(df, tv);
25
+ df->is_binary = false;
26
+ df->boost = 1.0;
27
+ }
28
+
29
+ DocField *df_create(const char *name, char *data, int store, int index, int tv)
30
+ {
31
+ DocField *df = ALLOC(DocField);
32
+ df_set(df, name, data, store, index, tv);
33
+ return df;
34
+ }
35
+
36
+ DocField *df_clone(DocField *self)
37
+ {
38
+ DocField *clone = ALLOC(DocField);
39
+ memcpy(clone, self, sizeof(DocField));
40
+ clone->name = estrdup(self->name);
41
+ clone->data = estrdup(self->data);
42
+ return clone;
43
+ }
44
+
45
+ void df_destroy(void *p)
46
+ {
47
+ DocField *df = (DocField *)p;
48
+ free(df->name);
49
+ free(p);
50
+ }
51
+
52
+ void df_destroy_data(void *p)
53
+ {
54
+ DocField *df = (DocField *)p;
55
+ free(df->data);
56
+ free(df->name);
57
+ free(p);
58
+ }
59
+
60
+ void df_set_store(DocField *df, int store)
61
+ {
62
+ switch (store) {
63
+ case DF_STORE_YES:
64
+ df->is_stored = true;
65
+ df->is_compressed = false;
66
+ break;
67
+ case DF_STORE_NO:
68
+ df->is_stored = false;
69
+ df->is_compressed = false;
70
+ break;
71
+ case DF_STORE_COMPRESS:
72
+ df->is_stored = true;
73
+ df->is_compressed = true;
74
+ break;
75
+ default:
76
+ eprintf(ARG_ERROR, "Invalid value %d for store in document field", store);
77
+ }
78
+ }
79
+
80
+ void df_set_index(DocField *df, int index)
81
+ {
82
+ df->omit_norms = false;
83
+ switch (index) {
84
+ case DF_INDEX_NO:
85
+ df->is_indexed = false;
86
+ df->is_tokenized = false;
87
+ break;
88
+ case DF_INDEX_TOKENIZED:
89
+ df->is_indexed = true;
90
+ df->is_tokenized = true;
91
+ break;
92
+ case DF_INDEX_UNTOKENIZED:
93
+ df->is_indexed = true;
94
+ df->is_tokenized = false;
95
+ break;
96
+ case DF_INDEX_NO_NORMS:
97
+ df->is_indexed = true;
98
+ df->is_tokenized = false;
99
+ df->omit_norms = true;
100
+ break;
101
+ default:
102
+ eprintf(ARG_ERROR, "Invalid value %d for index in document field", index);
103
+ }
104
+ }
105
+
106
+ void df_set_term_vector(DocField *df, int tv)
107
+ {
108
+ switch (tv) {
109
+ case DF_TERM_VECTOR_NO:
110
+ df->store_tv = false;
111
+ df->store_offset = false;
112
+ df->store_pos = false;
113
+ break;
114
+ case DF_TERM_VECTOR_YES:
115
+ df->store_tv = true;
116
+ df->store_offset = false;
117
+ df->store_pos = false;
118
+ break;
119
+ case DF_TERM_VECTOR_WITH_OFFSETS:
120
+ df->store_tv = true;
121
+ df->store_offset = true;
122
+ df->store_pos = false;
123
+ break;
124
+ case DF_TERM_VECTOR_WITH_POSITIONS:
125
+ df->store_tv = true;
126
+ df->store_offset = false;
127
+ df->store_pos = true;
128
+ break;
129
+ case DF_TERM_VECTOR_WITH_POSITIONS_OFFSETS:
130
+ df->store_tv = true;
131
+ df->store_offset = true;
132
+ df->store_pos = true;
133
+ break;
134
+ default:
135
+ eprintf(ARG_ERROR,
136
+ "Invalid value %d for term_vector in document field", tv);
137
+ }
138
+ }
139
+
140
+ DocField *df_create_binary(char *name, char *data, int blen, int store)
141
+ {
142
+ if (store == DF_STORE_NO) {
143
+ eprintf(ARG_ERROR, "It doesn't make sense not to store binary data\n");
144
+ }
145
+ DocField *df = df_create(name, data, store, DF_INDEX_NO, DF_TERM_VECTOR_NO);
146
+ df->is_binary = true;
147
+ df->blen = blen;
148
+ return df;
149
+ }
150
+
151
+ char *df_to_s(DocField *self)
152
+ {
153
+ /* the length of the str is name.len + data.len + 119, add safety 10 */
154
+ char *str = ALLOC_N(char, strlen(self->name) + strlen(self->data) + 129);
155
+ char *str_ptr = str;
156
+
157
+ if (self->is_stored) {
158
+ sprintf(str, "stored/%s,", self->is_compressed ? "compressed" : "uncompressed");
159
+ str_ptr = str + strlen(str);
160
+ }
161
+ sprintf(str_ptr, "%s%s%s%s%s%s%s<%s:%s>",
162
+ self->is_indexed ? "indexed," : "",
163
+ self->is_tokenized ? "tokenized," : "",
164
+ self->store_tv ? "store_term_vector," : "",
165
+ self->store_offset ? "store_offsets," : "",
166
+ self->store_pos ? "store_positions," : "",
167
+ self->omit_norms ? "omit_norms," : "",
168
+ self->is_binary ? "binary," : "",
169
+ self->name,
170
+ self->is_binary ? "=bin_data=" : self->data);
171
+
172
+ return str;
173
+ }
174
+
175
+ /****************************************************************************
176
+ *
177
+ * Document
178
+ *
179
+ ****************************************************************************/
180
+
181
+ Document *doc_create()
182
+ {
183
+ Document *doc = ALLOC(Document);
184
+ doc->fields = h_new_str(&free, &ary_destroy);
185
+ doc->fcnt = 0;
186
+ doc->dfcnt = 0;
187
+ doc->field_arr = NULL;
188
+ doc->df_arr = NULL;
189
+ doc->boost = 1.0;
190
+ doc->free_data = &df_destroy_data;
191
+ return doc;
192
+ }
193
+
194
+ Document *doc_create_keep_data()
195
+ {
196
+ Document *doc = doc_create();
197
+ doc->free_data = df_destroy;
198
+ return doc;
199
+ }
200
+
201
+ void doc_destroy(void *p)
202
+ {
203
+ Document *doc = (Document *)p;
204
+ free(doc->field_arr);
205
+ free(doc->df_arr);
206
+ h_destroy(doc->fields);
207
+ free(doc);
208
+ }
209
+
210
+ void doc_add_field(Document *doc, DocField *df)
211
+ {
212
+ Array *field_ga = (Array *)h_get(doc->fields, df->name);
213
+ if (field_ga == NULL) {
214
+ field_ga = ary_create(1, doc->free_data);
215
+ h_set(doc->fields, estrdup(df->name), field_ga);
216
+ doc->fcnt++;
217
+ REALLOC_N(doc->field_arr, Array *, doc->fcnt);
218
+ doc->field_arr[doc->fcnt-1] = field_ga;
219
+ }
220
+ ary_append(field_ga, df);
221
+ doc->dfcnt++;
222
+ REALLOC_N(doc->df_arr, DocField *, doc->dfcnt);
223
+ doc->df_arr[doc->dfcnt-1] = df;
224
+ }
225
+
226
+ DocField *doc_get_field(Document *doc, const char *fname)
227
+ {
228
+ Array *field_ga = (Array *)h_get(doc->fields, fname);
229
+ if (field_ga) {
230
+ return field_ga->elems[0];
231
+ } else {
232
+ return NULL;
233
+ }
234
+ }
235
+
236
+ Array *doc_get_fields(Document *doc, const char *fname)
237
+ {
238
+ return (Array *)h_get(doc->fields, fname);
239
+ }
240
+
241
+ /**
242
+ * TODO:
243
+ * This is not exactly elegant or efficient but it works and is not going to
244
+ * be a performance problem. Still, it would be nice to make the code a little
245
+ * clearer.
246
+ */
247
+ Array *doc_remove_fields(Document *doc, const char *fname)
248
+ {
249
+ Array *field_ga = (Array *)h_rem(doc->fields, fname, true);
250
+ if (field_ga) {
251
+ int i, j;
252
+ doc->fcnt--;
253
+ for (i = 0; i < doc->fcnt; i++) {
254
+ if (field_ga == doc->field_arr[i]) {
255
+ memmove(&doc->field_arr[i],
256
+ &doc->field_arr[i+1],
257
+ sizeof(void *) * (doc->fcnt - i));
258
+ break;
259
+ }
260
+ }
261
+ for (i = 0, j = 0; i < doc->dfcnt && j < field_ga->size;) {
262
+ if (field_ga->elems[j] == doc->df_arr[i]) {
263
+ memmove(&doc->df_arr[i],
264
+ &doc->df_arr[i+1],
265
+ sizeof(void *) * (doc->dfcnt - i - 1));
266
+ j++;
267
+ doc->dfcnt--;
268
+ } else {
269
+ i++;
270
+ }
271
+ }
272
+ return field_ga;
273
+ } else {
274
+ return NULL;
275
+ }
276
+ }
277
+
278
+ DocField *doc_remove_field(Document *doc, const char *fname)
279
+ {
280
+ DocField *df = NULL;
281
+ Array *dfs = (Array *)h_get(doc->fields, fname);
282
+ if (dfs) {
283
+ df = ary_remove(dfs, 0);
284
+ if (dfs->size == 0) {
285
+ Array *fields = doc_remove_fields(doc, fname);
286
+ ary_destroy(fields);
287
+ } else {
288
+ int i;
289
+ for (i = 0; i < doc->dfcnt; i++) {
290
+ if (df == doc->df_arr[i]) {
291
+ memmove(&doc->df_arr[i],
292
+ &doc->df_arr[i+1],
293
+ sizeof(void *) * (doc->dfcnt - i - 1));
294
+ doc->dfcnt--;
295
+ break;
296
+ }
297
+ }
298
+ }
299
+ }
300
+ return df;
301
+ }
302
+
303
+ bool doc_delete_fields(Document *doc, const char *fname)
304
+ {
305
+ Array *field_ga = doc_remove_fields(doc, fname);
306
+ if (field_ga) {
307
+ ary_destroy(field_ga);
308
+ return true;
309
+ } else {
310
+ return false;
311
+ }
312
+ return h_del(doc->fields, fname);
313
+ }
314
+
315
+ char *doc_to_s(Document *doc)
316
+ {
317
+ int i, len = 20;
318
+ char *str, *str_ptr;
319
+ char **df_strs = ALLOC_N(char *, doc->dfcnt);
320
+ for (i = 0; i < doc->dfcnt; i++) {
321
+ df_strs[i] = df_to_s(doc->df_arr[i]);
322
+ len += strlen(df_strs[i]) + 3;
323
+ }
324
+ str_ptr = str = ALLOC_N(char, len);
325
+ sprintf(str_ptr, "Document {\n");
326
+ str_ptr += strlen(str_ptr);
327
+ for (i = 0; i < doc->dfcnt; i++) {
328
+ sprintf(str_ptr, " %s\n", df_strs[i]);
329
+ free(df_strs[i]);
330
+ str_ptr += strlen(str_ptr);
331
+ }
332
+ sprintf(str_ptr, "}");
333
+ free(df_strs);
334
+
335
+ return str;
336
+ }
data/ext/document.h ADDED
@@ -0,0 +1,87 @@
1
+ #ifndef FRT_DOCUMENT_H
2
+ #define FRT_DOCUMENT_H
3
+
4
+ #include <global.h>
5
+ #include <hash.h>
6
+ #include <array.h>
7
+
8
+ /****************************************************************************
9
+ *
10
+ * DocField
11
+ *
12
+ ****************************************************************************/
13
+ enum {
14
+ DF_STORE_YES = 0,
15
+ DF_STORE_NO = 1,
16
+ DF_STORE_COMPRESS = 2
17
+ };
18
+
19
+ enum {
20
+ DF_INDEX_UNTOKENIZED = 0,
21
+ DF_INDEX_TOKENIZED = 1,
22
+ DF_INDEX_NO = 2,
23
+ DF_INDEX_NO_NORMS = 3
24
+ };
25
+
26
+ enum {
27
+ DF_TERM_VECTOR_NO = 0,
28
+ DF_TERM_VECTOR_YES = 1,
29
+ DF_TERM_VECTOR_WITH_POSITIONS = 2,
30
+ DF_TERM_VECTOR_WITH_OFFSETS = 3,
31
+ DF_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 4
32
+ };
33
+
34
+ typedef struct DocField {
35
+ char *name;
36
+ char *data;
37
+ int blen; // This is used for binary fields only to store the data length
38
+ float boost;
39
+ bool is_stored : 1;
40
+ bool is_compressed : 1;
41
+ bool is_indexed : 1;
42
+ bool is_tokenized : 1;
43
+ bool store_tv : 1;
44
+ bool store_pos : 1;
45
+ bool store_offset : 1;
46
+ bool omit_norms : 1;
47
+ bool is_binary : 1;
48
+ } DocField;
49
+
50
+ DocField *df_create(const char *name, char *data, int store, int index, int tv);
51
+ DocField *df_clone(DocField *self);
52
+ void df_set(DocField *df, const char *name, char *data, int store, int index, int tv);
53
+ void df_destroy(void *p);
54
+ void df_destroy_data(void *p);
55
+ void df_set_store(DocField *df, int store);
56
+ void df_set_index(DocField *df, int index);
57
+ void df_set_term_vector(DocField *df, int tv);
58
+ char *df_to_s(DocField *df);
59
+ DocField *df_create_binary(char *name, char *data, int blen, int store);
60
+
61
+ /****************************************************************************
62
+ *
63
+ * Document
64
+ *
65
+ ****************************************************************************/
66
+
67
+ typedef struct Document {
68
+ HshTable *fields;
69
+ Array **field_arr;
70
+ int fcnt;
71
+ DocField **df_arr;
72
+ int dfcnt;
73
+ float boost;
74
+ void (*free_data)(void *p);
75
+ } Document;
76
+
77
+ Document *doc_create();
78
+ Document *doc_create_keep_data();
79
+ void doc_destroy(void *p);
80
+ void doc_add_field(Document *doc, DocField *df);
81
+ DocField *doc_get_field(Document *doc, const char *fname);
82
+ Array *doc_get_fields(Document *doc, const char *fname);
83
+ Array *doc_remove_fields(Document *doc, const char *fname);
84
+ DocField *doc_remove_field(Document *doc, const char *fname);
85
+ bool doc_delete_fields(Document *doc, const char *fname);
86
+ char *doc_to_s(Document *doc);
87
+ #endif
data/ext/ferret.c CHANGED
@@ -1,73 +1,114 @@
1
1
  #include "ferret.h"
2
+ #include "hash.h"
3
+
4
+ /* Object Map */
5
+ static HshTable *object_map;
2
6
 
3
7
  /* IDs */
4
8
  ID id_new;
5
- ID id_close;
6
- ID id_size;
7
- ID id_iv_size;
8
9
 
9
10
  /* Modules */
10
11
  VALUE mFerret;
11
- VALUE mStore;
12
- VALUE mIndex;
13
- VALUE mUtils;
14
12
  VALUE mAnalysis;
13
+ VALUE mDocument;
14
+ VALUE mIndex;
15
15
  VALUE mSearch;
16
+ VALUE mStore;
16
17
  VALUE mStringHelper;
18
+ VALUE mUtils;
19
+ VALUE mSpans;
17
20
 
18
21
  /* Classes */
19
- VALUE cRAMDirectory;
20
- VALUE cIndexIn;
21
- VALUE cBufferedIndexIn;
22
- VALUE cFSIndexIn;
23
- VALUE cIndexOut;
24
- VALUE cBufferedIndexOut;
25
- VALUE cFSIndexOut;
26
- VALUE cRAMIndexOut;
27
- VALUE cRAMIndexIn;
28
- VALUE cTerm;
29
- VALUE cTermBuffer;
30
- VALUE cTermInfo;
31
- VALUE cToken;
32
- VALUE cPriorityQueue;
33
- VALUE cSegmentMergeQueue;
34
- VALUE cSegmentTermEnum;
35
- VALUE cTermEnum;
36
- VALUE cTermInfosReader;
37
- VALUE cSimilarity;
38
- VALUE cDefaultSimilarity;
22
+ /*
23
+ */
24
+
25
+
26
+ unsigned int
27
+ object_hash(const void *key)
28
+ {
29
+ return (unsigned int)key;
30
+ }
31
+
32
+ int
33
+ object_eq(const void *key1, const void *key2)
34
+ {
35
+ return key1 == key2;
36
+ }
37
+
38
+ VALUE
39
+ object_get(void *key)
40
+ {
41
+ VALUE val = (VALUE)h_get(object_map, key);
42
+ if (!val) val = Qnil;
43
+ return val;
44
+ }
45
+
46
+ //static int hash_cnt = 0;
47
+ void
48
+ //object_add(void *key, VALUE obj)
49
+ object_add2(void *key, VALUE obj, const char *file, int line, const char *func)
50
+ {
51
+ if (h_get(object_map, key))
52
+ printf("failed adding %d. %s:%d:%s\n", (int)key, file, line, func);
53
+ //printf("adding %d. now contains %d %s:%d:%s\n", (int)key, ++hash_cnt, file, line, func);
54
+ h_set(object_map, key, (void *)obj);
55
+ }
56
+
57
+ void
58
+ //object_del(void *key)
59
+ object_del2(void *key, const char *file, int line, const char *func)
60
+ {
61
+ if (object_get(key) == Qnil)
62
+ printf("failed deleting %d. %s:%d:%s\n", (int)key, file, line, func);
63
+ //printf("deleting %d. now contains %d, %s:%d:%s\n", (int)key, --hash_cnt, file, line, func);
64
+ h_del(object_map, key);
65
+ }
66
+
67
+ void
68
+ frt_gc_mark(void *key)
69
+ {
70
+ VALUE val = (VALUE)h_get(object_map, key);
71
+ if (val)
72
+ rb_gc_mark(val);
73
+ }
74
+
75
+ VALUE
76
+ frt_data_alloc(VALUE klass)
77
+ {
78
+ return Frt_Make_Struct(klass);
79
+ }
80
+
81
+ void
82
+ frt_deref_free(void *p)
83
+ {
84
+ object_del(p);
85
+ }
39
86
 
40
87
  void
41
88
  Init_ferret_ext(void)
42
89
  {
90
+ /* initialize object map */
91
+ object_map = h_new(&object_hash, &object_eq, NULL, NULL);
92
+
43
93
  /* IDs */
44
- id_new = rb_intern("new");
45
- id_close = rb_intern("close");
46
- id_size = rb_intern("size");
47
- id_iv_size = rb_intern("@size");
94
+ id_new = rb_intern("new");
48
95
 
49
96
  /* Modules */
50
97
  mFerret = rb_define_module("Ferret");
51
- mStore = rb_define_module_under(mFerret, "Store");
52
- mIndex = rb_define_module_under(mFerret, "Index");
53
- mUtils = rb_define_module_under(mFerret, "Utils");
54
98
  mAnalysis = rb_define_module_under(mFerret, "Analysis");
99
+ mDocument = rb_define_module_under(mFerret, "Document");
100
+ mIndex = rb_define_module_under(mFerret, "Index");
55
101
  mSearch = rb_define_module_under(mFerret, "Search");
56
-
57
- /* Classes */
58
- cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
102
+ mStore = rb_define_module_under(mFerret, "Store");
103
+ mUtils = rb_define_module_under(mFerret, "Utils");
104
+ mSpans = rb_define_module_under(mSearch, "Spans");
59
105
 
60
106
  /* Inits */
61
- Init_indexio();
62
107
  Init_term();
63
- Init_term_buffer();
64
- Init_term_info();
65
- Init_term_infos_reader();
66
- Init_token();
67
- Init_priority_queue();
68
- Init_segment_merge_queue();
69
- Init_segment_term_enum();
70
- Init_ram_directory();
71
- Init_string_helper();
72
- Init_similarity();
108
+ Init_analysis();
109
+ Init_doc();
110
+ Init_dir();
111
+ Init_index_io();
112
+ Init_search();
113
+ Init_qparser();
73
114
  }