ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/document.c ADDED
@@ -0,0 +1,336 @@
1
+ #include <document.h>
2
+ #include <string.h>
3
+
4
+ /****************************************************************************
5
+ *
6
+ * DocField
7
+ *
8
+ ****************************************************************************/
9
+
10
+ inline void df_set(DocField *df, const char *name,
11
+ char *data, int store, int index, int tv)
12
+ {
13
+ if ((index == DF_INDEX_NO) && (store == DF_STORE_NO))
14
+ eprintf(ARG_ERROR,
15
+ "it doesn't make sense to have a field that is neither indexed nor stored");
16
+ if ((index == DF_INDEX_NO) && (tv != DF_TERM_VECTOR_NO))
17
+ eprintf(ARG_ERROR,
18
+ "cannot store term vector information for a field that is not indexed");
19
+ df->name = estrdup(name);
20
+ df->data = data;
21
+ df->blen = strlen(data);
22
+ df_set_store(df, store);
23
+ df_set_index(df, index);
24
+ df_set_term_vector(df, tv);
25
+ df->is_binary = false;
26
+ df->boost = 1.0;
27
+ }
28
+
29
+ DocField *df_create(const char *name, char *data, int store, int index, int tv)
30
+ {
31
+ DocField *df = ALLOC(DocField);
32
+ df_set(df, name, data, store, index, tv);
33
+ return df;
34
+ }
35
+
36
+ DocField *df_clone(DocField *self)
37
+ {
38
+ DocField *clone = ALLOC(DocField);
39
+ memcpy(clone, self, sizeof(DocField));
40
+ clone->name = estrdup(self->name);
41
+ clone->data = estrdup(self->data);
42
+ return clone;
43
+ }
44
+
45
+ void df_destroy(void *p)
46
+ {
47
+ DocField *df = (DocField *)p;
48
+ free(df->name);
49
+ free(p);
50
+ }
51
+
52
+ void df_destroy_data(void *p)
53
+ {
54
+ DocField *df = (DocField *)p;
55
+ free(df->data);
56
+ free(df->name);
57
+ free(p);
58
+ }
59
+
60
+ void df_set_store(DocField *df, int store)
61
+ {
62
+ switch (store) {
63
+ case DF_STORE_YES:
64
+ df->is_stored = true;
65
+ df->is_compressed = false;
66
+ break;
67
+ case DF_STORE_NO:
68
+ df->is_stored = false;
69
+ df->is_compressed = false;
70
+ break;
71
+ case DF_STORE_COMPRESS:
72
+ df->is_stored = true;
73
+ df->is_compressed = true;
74
+ break;
75
+ default:
76
+ eprintf(ARG_ERROR, "Invalid value %d for store in document field", store);
77
+ }
78
+ }
79
+
80
+ void df_set_index(DocField *df, int index)
81
+ {
82
+ df->omit_norms = false;
83
+ switch (index) {
84
+ case DF_INDEX_NO:
85
+ df->is_indexed = false;
86
+ df->is_tokenized = false;
87
+ break;
88
+ case DF_INDEX_TOKENIZED:
89
+ df->is_indexed = true;
90
+ df->is_tokenized = true;
91
+ break;
92
+ case DF_INDEX_UNTOKENIZED:
93
+ df->is_indexed = true;
94
+ df->is_tokenized = false;
95
+ break;
96
+ case DF_INDEX_NO_NORMS:
97
+ df->is_indexed = true;
98
+ df->is_tokenized = false;
99
+ df->omit_norms = true;
100
+ break;
101
+ default:
102
+ eprintf(ARG_ERROR, "Invalid value %d for index in document field", index);
103
+ }
104
+ }
105
+
106
+ void df_set_term_vector(DocField *df, int tv)
107
+ {
108
+ switch (tv) {
109
+ case DF_TERM_VECTOR_NO:
110
+ df->store_tv = false;
111
+ df->store_offset = false;
112
+ df->store_pos = false;
113
+ break;
114
+ case DF_TERM_VECTOR_YES:
115
+ df->store_tv = true;
116
+ df->store_offset = false;
117
+ df->store_pos = false;
118
+ break;
119
+ case DF_TERM_VECTOR_WITH_OFFSETS:
120
+ df->store_tv = true;
121
+ df->store_offset = true;
122
+ df->store_pos = false;
123
+ break;
124
+ case DF_TERM_VECTOR_WITH_POSITIONS:
125
+ df->store_tv = true;
126
+ df->store_offset = false;
127
+ df->store_pos = true;
128
+ break;
129
+ case DF_TERM_VECTOR_WITH_POSITIONS_OFFSETS:
130
+ df->store_tv = true;
131
+ df->store_offset = true;
132
+ df->store_pos = true;
133
+ break;
134
+ default:
135
+ eprintf(ARG_ERROR,
136
+ "Invalid value %d for term_vector in document field", tv);
137
+ }
138
+ }
139
+
140
+ DocField *df_create_binary(char *name, char *data, int blen, int store)
141
+ {
142
+ if (store == DF_STORE_NO) {
143
+ eprintf(ARG_ERROR, "It doesn't make sense not to store binary data\n");
144
+ }
145
+ DocField *df = df_create(name, data, store, DF_INDEX_NO, DF_TERM_VECTOR_NO);
146
+ df->is_binary = true;
147
+ df->blen = blen;
148
+ return df;
149
+ }
150
+
151
+ char *df_to_s(DocField *self)
152
+ {
153
+ /* the length of the str is name.len + data.len + 119, add safety 10 */
154
+ char *str = ALLOC_N(char, strlen(self->name) + strlen(self->data) + 129);
155
+ char *str_ptr = str;
156
+
157
+ if (self->is_stored) {
158
+ sprintf(str, "stored/%s,", self->is_compressed ? "compressed" : "uncompressed");
159
+ str_ptr = str + strlen(str);
160
+ }
161
+ sprintf(str_ptr, "%s%s%s%s%s%s%s<%s:%s>",
162
+ self->is_indexed ? "indexed," : "",
163
+ self->is_tokenized ? "tokenized," : "",
164
+ self->store_tv ? "store_term_vector," : "",
165
+ self->store_offset ? "store_offsets," : "",
166
+ self->store_pos ? "store_positions," : "",
167
+ self->omit_norms ? "omit_norms," : "",
168
+ self->is_binary ? "binary," : "",
169
+ self->name,
170
+ self->is_binary ? "=bin_data=" : self->data);
171
+
172
+ return str;
173
+ }
174
+
175
+ /****************************************************************************
176
+ *
177
+ * Document
178
+ *
179
+ ****************************************************************************/
180
+
181
+ Document *doc_create()
182
+ {
183
+ Document *doc = ALLOC(Document);
184
+ doc->fields = h_new_str(&free, &ary_destroy);
185
+ doc->fcnt = 0;
186
+ doc->dfcnt = 0;
187
+ doc->field_arr = NULL;
188
+ doc->df_arr = NULL;
189
+ doc->boost = 1.0;
190
+ doc->free_data = &df_destroy_data;
191
+ return doc;
192
+ }
193
+
194
+ Document *doc_create_keep_data()
195
+ {
196
+ Document *doc = doc_create();
197
+ doc->free_data = df_destroy;
198
+ return doc;
199
+ }
200
+
201
+ void doc_destroy(void *p)
202
+ {
203
+ Document *doc = (Document *)p;
204
+ free(doc->field_arr);
205
+ free(doc->df_arr);
206
+ h_destroy(doc->fields);
207
+ free(doc);
208
+ }
209
+
210
+ void doc_add_field(Document *doc, DocField *df)
211
+ {
212
+ Array *field_ga = (Array *)h_get(doc->fields, df->name);
213
+ if (field_ga == NULL) {
214
+ field_ga = ary_create(1, doc->free_data);
215
+ h_set(doc->fields, estrdup(df->name), field_ga);
216
+ doc->fcnt++;
217
+ REALLOC_N(doc->field_arr, Array *, doc->fcnt);
218
+ doc->field_arr[doc->fcnt-1] = field_ga;
219
+ }
220
+ ary_append(field_ga, df);
221
+ doc->dfcnt++;
222
+ REALLOC_N(doc->df_arr, DocField *, doc->dfcnt);
223
+ doc->df_arr[doc->dfcnt-1] = df;
224
+ }
225
+
226
+ DocField *doc_get_field(Document *doc, const char *fname)
227
+ {
228
+ Array *field_ga = (Array *)h_get(doc->fields, fname);
229
+ if (field_ga) {
230
+ return field_ga->elems[0];
231
+ } else {
232
+ return NULL;
233
+ }
234
+ }
235
+
236
+ Array *doc_get_fields(Document *doc, const char *fname)
237
+ {
238
+ return (Array *)h_get(doc->fields, fname);
239
+ }
240
+
241
+ /**
242
+ * TODO:
243
+ * This is not exactly elegant or efficient but it works and is not going to
244
+ * be a performance problem. Still, it would be nice to make the code a little
245
+ * clearer.
246
+ */
247
+ Array *doc_remove_fields(Document *doc, const char *fname)
248
+ {
249
+ Array *field_ga = (Array *)h_rem(doc->fields, fname, true);
250
+ if (field_ga) {
251
+ int i, j;
252
+ doc->fcnt--;
253
+ for (i = 0; i < doc->fcnt; i++) {
254
+ if (field_ga == doc->field_arr[i]) {
255
+ memmove(&doc->field_arr[i],
256
+ &doc->field_arr[i+1],
257
+ sizeof(void *) * (doc->fcnt - i));
258
+ break;
259
+ }
260
+ }
261
+ for (i = 0, j = 0; i < doc->dfcnt && j < field_ga->size;) {
262
+ if (field_ga->elems[j] == doc->df_arr[i]) {
263
+ memmove(&doc->df_arr[i],
264
+ &doc->df_arr[i+1],
265
+ sizeof(void *) * (doc->dfcnt - i - 1));
266
+ j++;
267
+ doc->dfcnt--;
268
+ } else {
269
+ i++;
270
+ }
271
+ }
272
+ return field_ga;
273
+ } else {
274
+ return NULL;
275
+ }
276
+ }
277
+
278
+ DocField *doc_remove_field(Document *doc, const char *fname)
279
+ {
280
+ DocField *df = NULL;
281
+ Array *dfs = (Array *)h_get(doc->fields, fname);
282
+ if (dfs) {
283
+ df = ary_remove(dfs, 0);
284
+ if (dfs->size == 0) {
285
+ Array *fields = doc_remove_fields(doc, fname);
286
+ ary_destroy(fields);
287
+ } else {
288
+ int i;
289
+ for (i = 0; i < doc->dfcnt; i++) {
290
+ if (df == doc->df_arr[i]) {
291
+ memmove(&doc->df_arr[i],
292
+ &doc->df_arr[i+1],
293
+ sizeof(void *) * (doc->dfcnt - i - 1));
294
+ doc->dfcnt--;
295
+ break;
296
+ }
297
+ }
298
+ }
299
+ }
300
+ return df;
301
+ }
302
+
303
+ bool doc_delete_fields(Document *doc, const char *fname)
304
+ {
305
+ Array *field_ga = doc_remove_fields(doc, fname);
306
+ if (field_ga) {
307
+ ary_destroy(field_ga);
308
+ return true;
309
+ } else {
310
+ return false;
311
+ }
312
+ return h_del(doc->fields, fname);
313
+ }
314
+
315
+ char *doc_to_s(Document *doc)
316
+ {
317
+ int i, len = 20;
318
+ char *str, *str_ptr;
319
+ char **df_strs = ALLOC_N(char *, doc->dfcnt);
320
+ for (i = 0; i < doc->dfcnt; i++) {
321
+ df_strs[i] = df_to_s(doc->df_arr[i]);
322
+ len += strlen(df_strs[i]) + 3;
323
+ }
324
+ str_ptr = str = ALLOC_N(char, len);
325
+ sprintf(str_ptr, "Document {\n");
326
+ str_ptr += strlen(str_ptr);
327
+ for (i = 0; i < doc->dfcnt; i++) {
328
+ sprintf(str_ptr, " %s\n", df_strs[i]);
329
+ free(df_strs[i]);
330
+ str_ptr += strlen(str_ptr);
331
+ }
332
+ sprintf(str_ptr, "}");
333
+ free(df_strs);
334
+
335
+ return str;
336
+ }
data/ext/document.h ADDED
@@ -0,0 +1,87 @@
1
+ #ifndef FRT_DOCUMENT_H
2
+ #define FRT_DOCUMENT_H
3
+
4
+ #include <global.h>
5
+ #include <hash.h>
6
+ #include <array.h>
7
+
8
+ /****************************************************************************
9
+ *
10
+ * DocField
11
+ *
12
+ ****************************************************************************/
13
+ enum {
14
+ DF_STORE_YES = 0,
15
+ DF_STORE_NO = 1,
16
+ DF_STORE_COMPRESS = 2
17
+ };
18
+
19
+ enum {
20
+ DF_INDEX_UNTOKENIZED = 0,
21
+ DF_INDEX_TOKENIZED = 1,
22
+ DF_INDEX_NO = 2,
23
+ DF_INDEX_NO_NORMS = 3
24
+ };
25
+
26
+ enum {
27
+ DF_TERM_VECTOR_NO = 0,
28
+ DF_TERM_VECTOR_YES = 1,
29
+ DF_TERM_VECTOR_WITH_POSITIONS = 2,
30
+ DF_TERM_VECTOR_WITH_OFFSETS = 3,
31
+ DF_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 4
32
+ };
33
+
34
+ typedef struct DocField {
35
+ char *name;
36
+ char *data;
37
+ int blen; // This is used for binary fields only to store the data length
38
+ float boost;
39
+ bool is_stored : 1;
40
+ bool is_compressed : 1;
41
+ bool is_indexed : 1;
42
+ bool is_tokenized : 1;
43
+ bool store_tv : 1;
44
+ bool store_pos : 1;
45
+ bool store_offset : 1;
46
+ bool omit_norms : 1;
47
+ bool is_binary : 1;
48
+ } DocField;
49
+
50
+ DocField *df_create(const char *name, char *data, int store, int index, int tv);
51
+ DocField *df_clone(DocField *self);
52
+ void df_set(DocField *df, const char *name, char *data, int store, int index, int tv);
53
+ void df_destroy(void *p);
54
+ void df_destroy_data(void *p);
55
+ void df_set_store(DocField *df, int store);
56
+ void df_set_index(DocField *df, int index);
57
+ void df_set_term_vector(DocField *df, int tv);
58
+ char *df_to_s(DocField *df);
59
+ DocField *df_create_binary(char *name, char *data, int blen, int store);
60
+
61
+ /****************************************************************************
62
+ *
63
+ * Document
64
+ *
65
+ ****************************************************************************/
66
+
67
+ typedef struct Document {
68
+ HshTable *fields;
69
+ Array **field_arr;
70
+ int fcnt;
71
+ DocField **df_arr;
72
+ int dfcnt;
73
+ float boost;
74
+ void (*free_data)(void *p);
75
+ } Document;
76
+
77
+ Document *doc_create();
78
+ Document *doc_create_keep_data();
79
+ void doc_destroy(void *p);
80
+ void doc_add_field(Document *doc, DocField *df);
81
+ DocField *doc_get_field(Document *doc, const char *fname);
82
+ Array *doc_get_fields(Document *doc, const char *fname);
83
+ Array *doc_remove_fields(Document *doc, const char *fname);
84
+ DocField *doc_remove_field(Document *doc, const char *fname);
85
+ bool doc_delete_fields(Document *doc, const char *fname);
86
+ char *doc_to_s(Document *doc);
87
+ #endif
data/ext/ferret.c CHANGED
@@ -1,73 +1,114 @@
1
1
  #include "ferret.h"
2
+ #include "hash.h"
3
+
4
+ /* Object Map */
5
+ static HshTable *object_map;
2
6
 
3
7
  /* IDs */
4
8
  ID id_new;
5
- ID id_close;
6
- ID id_size;
7
- ID id_iv_size;
8
9
 
9
10
  /* Modules */
10
11
  VALUE mFerret;
11
- VALUE mStore;
12
- VALUE mIndex;
13
- VALUE mUtils;
14
12
  VALUE mAnalysis;
13
+ VALUE mDocument;
14
+ VALUE mIndex;
15
15
  VALUE mSearch;
16
+ VALUE mStore;
16
17
  VALUE mStringHelper;
18
+ VALUE mUtils;
19
+ VALUE mSpans;
17
20
 
18
21
  /* Classes */
19
- VALUE cRAMDirectory;
20
- VALUE cIndexIn;
21
- VALUE cBufferedIndexIn;
22
- VALUE cFSIndexIn;
23
- VALUE cIndexOut;
24
- VALUE cBufferedIndexOut;
25
- VALUE cFSIndexOut;
26
- VALUE cRAMIndexOut;
27
- VALUE cRAMIndexIn;
28
- VALUE cTerm;
29
- VALUE cTermBuffer;
30
- VALUE cTermInfo;
31
- VALUE cToken;
32
- VALUE cPriorityQueue;
33
- VALUE cSegmentMergeQueue;
34
- VALUE cSegmentTermEnum;
35
- VALUE cTermEnum;
36
- VALUE cTermInfosReader;
37
- VALUE cSimilarity;
38
- VALUE cDefaultSimilarity;
22
+ /*
23
+ */
24
+
25
+
26
+ unsigned int
27
+ object_hash(const void *key)
28
+ {
29
+ return (unsigned int)key;
30
+ }
31
+
32
+ int
33
+ object_eq(const void *key1, const void *key2)
34
+ {
35
+ return key1 == key2;
36
+ }
37
+
38
+ VALUE
39
+ object_get(void *key)
40
+ {
41
+ VALUE val = (VALUE)h_get(object_map, key);
42
+ if (!val) val = Qnil;
43
+ return val;
44
+ }
45
+
46
+ //static int hash_cnt = 0;
47
+ void
48
+ //object_add(void *key, VALUE obj)
49
+ object_add2(void *key, VALUE obj, const char *file, int line, const char *func)
50
+ {
51
+ if (h_get(object_map, key))
52
+ printf("failed adding %d. %s:%d:%s\n", (int)key, file, line, func);
53
+ //printf("adding %d. now contains %d %s:%d:%s\n", (int)key, ++hash_cnt, file, line, func);
54
+ h_set(object_map, key, (void *)obj);
55
+ }
56
+
57
+ void
58
+ //object_del(void *key)
59
+ object_del2(void *key, const char *file, int line, const char *func)
60
+ {
61
+ if (object_get(key) == Qnil)
62
+ printf("failed deleting %d. %s:%d:%s\n", (int)key, file, line, func);
63
+ //printf("deleting %d. now contains %d, %s:%d:%s\n", (int)key, --hash_cnt, file, line, func);
64
+ h_del(object_map, key);
65
+ }
66
+
67
+ void
68
+ frt_gc_mark(void *key)
69
+ {
70
+ VALUE val = (VALUE)h_get(object_map, key);
71
+ if (val)
72
+ rb_gc_mark(val);
73
+ }
74
+
75
+ VALUE
76
+ frt_data_alloc(VALUE klass)
77
+ {
78
+ return Frt_Make_Struct(klass);
79
+ }
80
+
81
+ void
82
+ frt_deref_free(void *p)
83
+ {
84
+ object_del(p);
85
+ }
39
86
 
40
87
  void
41
88
  Init_ferret_ext(void)
42
89
  {
90
+ /* initialize object map */
91
+ object_map = h_new(&object_hash, &object_eq, NULL, NULL);
92
+
43
93
  /* IDs */
44
- id_new = rb_intern("new");
45
- id_close = rb_intern("close");
46
- id_size = rb_intern("size");
47
- id_iv_size = rb_intern("@size");
94
+ id_new = rb_intern("new");
48
95
 
49
96
  /* Modules */
50
97
  mFerret = rb_define_module("Ferret");
51
- mStore = rb_define_module_under(mFerret, "Store");
52
- mIndex = rb_define_module_under(mFerret, "Index");
53
- mUtils = rb_define_module_under(mFerret, "Utils");
54
98
  mAnalysis = rb_define_module_under(mFerret, "Analysis");
99
+ mDocument = rb_define_module_under(mFerret, "Document");
100
+ mIndex = rb_define_module_under(mFerret, "Index");
55
101
  mSearch = rb_define_module_under(mFerret, "Search");
56
-
57
- /* Classes */
58
- cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
102
+ mStore = rb_define_module_under(mFerret, "Store");
103
+ mUtils = rb_define_module_under(mFerret, "Utils");
104
+ mSpans = rb_define_module_under(mSearch, "Spans");
59
105
 
60
106
  /* Inits */
61
- Init_indexio();
62
107
  Init_term();
63
- Init_term_buffer();
64
- Init_term_info();
65
- Init_term_infos_reader();
66
- Init_token();
67
- Init_priority_queue();
68
- Init_segment_merge_queue();
69
- Init_segment_term_enum();
70
- Init_ram_directory();
71
- Init_string_helper();
72
- Init_similarity();
108
+ Init_analysis();
109
+ Init_doc();
110
+ Init_dir();
111
+ Init_index_io();
112
+ Init_search();
113
+ Init_qparser();
73
114
  }