jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/ferret.h ADDED
@@ -0,0 +1,94 @@
1
+ #ifndef __FERRET_H_
2
+ #define __FERRET_H_
3
+
4
+ #include "global.h"
5
+ #include "hashset.h"
6
+ #include "document.h"
7
+ #include "symbol.h"
8
+ #include "internal.h"
9
+
10
+ /* IDs */
11
+ extern ID id_new;
12
+ extern ID id_call;
13
+ extern ID id_hash;
14
+ extern ID id_eql;
15
+ extern ID id_capacity;
16
+ extern ID id_less_than;
17
+ extern ID id_lt;
18
+ extern ID id_is_directory;
19
+ extern ID id_close;
20
+ extern ID id_cclass;
21
+ extern ID id_data;
22
+
23
+ /* Symbols */
24
+ extern VALUE sym_yes;
25
+ extern VALUE sym_no;
26
+ extern VALUE sym_true;
27
+ extern VALUE sym_false;
28
+ extern VALUE sym_path;
29
+ extern VALUE sym_dir;
30
+
31
+ /* Modules */
32
+ extern VALUE mFerret;
33
+ extern VALUE mIndex;
34
+ extern VALUE mSearch;
35
+ extern VALUE mStore;
36
+ extern VALUE mStringHelper;
37
+ extern VALUE mSpans;
38
+
39
+ /* Classes */
40
+ extern VALUE cDirectory;
41
+ extern VALUE cLockError;
42
+ extern VALUE cTerm;
43
+
44
+ /* Ferret Inits */
45
+ extern void Init_Utils();
46
+ extern void Init_Analysis();
47
+ extern void Init_Store();
48
+ extern void Init_Index();
49
+ extern void Init_Search();
50
+ extern void Init_QueryParser();
51
+
52
+ //extern void object_add(void *key, VALUE obj);
53
+ #define object_add(key, obj) object_add2(key, obj, __FILE__, __LINE__)
54
+ extern void object_add2(void *key, VALUE obj, const char *file, int line);
55
+ //extern void object_set(void *key, VALUE obj);
56
+ #define object_set(key, obj) object_set2(key, obj, __FILE__, __LINE__)
57
+ extern void object_set2(void *key, VALUE obj, const char *file, int line);
58
+ //extern void object_del(void *key);
59
+ #define object_del(key) object_del2(key, __FILE__, __LINE__)
60
+ extern void object_del2(void *key, const char *file, int line);
61
+ extern void frb_gc_mark(void *key);
62
+ extern VALUE object_get(void *key);
63
+ extern VALUE frb_data_alloc(VALUE klass);
64
+ extern void frb_deref_free(void *p);
65
+ extern void frb_create_dir(VALUE rpath);
66
+ extern VALUE frb_hs_to_rb_ary(HashSet *hs);
67
+ extern void *frb_rb_data_ptr(VALUE val);
68
+ extern Symbol frb_field(VALUE rfield);
69
+ extern VALUE frb_get_term(Symbol field, const char *term);
70
+ extern char *json_concat_string(char *s, char *field);
71
+ extern char *rs2s(VALUE rstr);
72
+ extern char *rstrdup(VALUE rstr);
73
+ extern Symbol rintern(VALUE rstr);
74
+ #define Frt_Make_Struct(klass)\
75
+ rb_data_object_alloc(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)
76
+
77
+ #define Frt_Wrap_Struct(self,mmark,mfree,mdata)\
78
+ do {\
79
+ ((struct RData *)(self))->data = mdata;\
80
+ ((struct RData *)(self))->dmark = (RUBY_DATA_FUNC)mmark;\
81
+ ((struct RData *)(self))->dfree = (RUBY_DATA_FUNC)mfree;\
82
+ } while (0)
83
+
84
+ #define Frt_Unwrap_Struct(self)\
85
+ do {\
86
+ ((struct RData *)(self))->data = NULL;\
87
+ ((struct RData *)(self))->dmark = NULL;\
88
+ ((struct RData *)(self))->dfree = NULL;\
89
+ } while (0)
90
+
91
+ #endif
92
+
93
+ #define frb_mark_cclass(klass) rb_ivar_set(klass, id_cclass, Qtrue)
94
+ #define frb_is_cclass(obj) (rb_ivar_get(CLASS_OF(obj), id_cclass) == Qtrue)
data/ext/field_index.c ADDED
@@ -0,0 +1,262 @@
1
+ #include <string.h>
2
+ #include "field_index.h"
3
+ #include "internal.h"
4
+
5
+ /***************************************************************************
6
+ *
7
+ * FieldIndex
8
+ *
9
+ ***************************************************************************/
10
+
11
+ static unsigned long field_index_hash(const void *p)
12
+ {
13
+ FieldIndex *self = (FieldIndex *)p;
14
+ return sym_hash(self->field) ^ (unsigned long)(self->klass);
15
+ }
16
+
17
+ static int field_index_eq(const void *p1, const void *p2)
18
+ {
19
+ FieldIndex *fi1 = (FieldIndex *)p1;
20
+ FieldIndex *fi2 = (FieldIndex *)p2;
21
+ return (fi1->field == fi2->field) &&
22
+ (fi1->klass->type == fi2->klass->type);
23
+ }
24
+
25
+ static void field_index_destroy(void *p)
26
+ {
27
+ FieldIndex *self = (FieldIndex *)p;
28
+ if (self->index) {
29
+ self->klass->destroy_index(self->index);
30
+ }
31
+ free(self);
32
+ }
33
+
34
+ FieldIndex *field_index_get(IndexReader *ir, Symbol field,
35
+ const FieldIndexClass *klass)
36
+ {
37
+ int length = 0;
38
+ TermEnum *volatile te = NULL;
39
+ TermDocEnum *volatile tde = NULL;
40
+ FieldInfo *fi = fis_get_field(ir->fis, field);
41
+ const volatile int field_num = fi ? fi->number : -1;
42
+ FieldIndex *volatile self = NULL;
43
+ FieldIndex key;
44
+
45
+ if (field_num < 0) {
46
+ RAISE(ARG_ERROR,
47
+ "Cannot sort by field \"%s\". It doesn't exist in the index.",
48
+ S(field));
49
+ }
50
+
51
+ if (!ir->field_index_cache) {
52
+ ir->field_index_cache = h_new(&field_index_hash, &field_index_eq,
53
+ NULL, &field_index_destroy);
54
+ }
55
+
56
+ key.field = field;
57
+ key.klass = klass;
58
+ self = (FieldIndex *)h_get(ir->field_index_cache, &key);
59
+
60
+ if (self == NULL) {
61
+ self = ALLOC(FieldIndex);
62
+ self->klass = klass;
63
+ /* FieldIndex only lives as long as the IndexReader lives so we can
64
+ * just use the field_infos field string */
65
+ self->field = fi->name;
66
+
67
+ length = ir->max_doc(ir);
68
+ if (length > 0) {
69
+ TRY
70
+ {
71
+ void *index;
72
+ tde = ir->term_docs(ir);
73
+ te = ir->terms(ir, field_num);
74
+ index = self->index = klass->create_index(length);
75
+ while (te->next(te)) {
76
+ tde->seek_te(tde, te);
77
+ klass->handle_term(index, tde, te->curr_term);
78
+ }
79
+ }
80
+ XFINALLY
81
+ tde->close(tde);
82
+ te->close(te);
83
+ XENDTRY
84
+ }
85
+ h_set(ir->field_index_cache, self, self);
86
+ }
87
+
88
+ return self;
89
+ }
90
+
91
+ /******************************************************************************
92
+ * ByteFieldIndex < FieldIndex
93
+ *
94
+ * The ByteFieldIndex holds an array of integers for each document in the
95
+ * index where the integer represents the sort value for the document. This
96
+ * index should only be used for sorting and not as a field cache of the
97
+ * column's value.
98
+ ******************************************************************************/
99
+ static void byte_handle_term(void *index_ptr,
100
+ TermDocEnum *tde,
101
+ const char *text)
102
+ {
103
+ long *index = (long *)index_ptr;
104
+ long val = index[-1]++;
105
+ (void)text;
106
+ while (tde->next(tde)) {
107
+ index[tde->doc_num(tde)] = val;
108
+ }
109
+ }
110
+
111
+ static void *byte_create_index(int size)
112
+ {
113
+ long *index = ALLOC_AND_ZERO_N(long, size + 1);
114
+ index[0] = 1;
115
+ return &index[1];
116
+ }
117
+
118
+ static void byte_destroy_index(void *p)
119
+ {
120
+ long *index = (long *)p;
121
+ free(&index[-1]);
122
+ }
123
+
124
+ const FieldIndexClass BYTE_FIELD_INDEX_CLASS = {
125
+ "byte",
126
+ &byte_create_index,
127
+ &byte_destroy_index,
128
+ &byte_handle_term
129
+ };
130
+
131
+ /******************************************************************************
132
+ * IntegerFieldIndex < FieldIndex
133
+ ******************************************************************************/
134
+ static void *integer_create_index(int size)
135
+ {
136
+ return ALLOC_AND_ZERO_N(long, size);
137
+ }
138
+
139
+ static void integer_handle_term(void *index_ptr,
140
+ TermDocEnum *tde,
141
+ const char *text)
142
+ {
143
+ long *index = (long *)index_ptr;
144
+ long val;
145
+ sscanf(text, "%ld", &val);
146
+ while (tde->next(tde)) {
147
+ index[tde->doc_num(tde)] = val;
148
+ }
149
+ }
150
+
151
+ const FieldIndexClass INTEGER_FIELD_INDEX_CLASS = {
152
+ "integer",
153
+ &integer_create_index,
154
+ &free,
155
+ &integer_handle_term
156
+ };
157
+
158
+ long get_integer_value(FieldIndex *field_index, long doc_num)
159
+ {
160
+ if (field_index->klass == &INTEGER_FIELD_INDEX_CLASS && doc_num >= 0) {
161
+ return ((long *)field_index->index)[doc_num];
162
+ }
163
+ return 0l;
164
+ }
165
+
166
+
167
+ /******************************************************************************
168
+ * FloatFieldIndex < FieldIndex
169
+ ******************************************************************************/
170
+ #define VALUES_ARRAY_START_SIZE 8
171
+ static void *float_create_index(int size)
172
+ {
173
+ return ALLOC_AND_ZERO_N(float, size);
174
+ }
175
+
176
+ static void float_handle_term(void *index_ptr,
177
+ TermDocEnum *tde,
178
+ const char *text)
179
+ {
180
+ float *index = (float *)index_ptr;
181
+ float val;
182
+ sscanf(text, "%g", &val);
183
+ while (tde->next(tde)) {
184
+ index[tde->doc_num(tde)] = val;
185
+ }
186
+ }
187
+
188
+ const FieldIndexClass FLOAT_FIELD_INDEX_CLASS = {
189
+ "float",
190
+ &float_create_index,
191
+ &free,
192
+ &float_handle_term
193
+ };
194
+
195
+ float get_float_value(FieldIndex *field_index, long doc_num)
196
+ {
197
+ if (field_index->klass == &FLOAT_FIELD_INDEX_CLASS && doc_num >= 0) {
198
+ return ((float *)field_index->index)[doc_num];
199
+ }
200
+ return 0.0f;
201
+ }
202
+
203
+ /******************************************************************************
204
+ * StringFieldIndex < FieldIndex
205
+ ******************************************************************************/
206
+
207
+ static void *string_create_index(int size)
208
+ {
209
+ StringIndex *self = ALLOC_AND_ZERO(StringIndex);
210
+ self->size = size;
211
+ self->index = ALLOC_AND_ZERO_N(long, size);
212
+ self->v_capa = VALUES_ARRAY_START_SIZE;
213
+ self->v_size = 1; /* leave the first value as NULL */
214
+ self->values = ALLOC_AND_ZERO_N(char *, VALUES_ARRAY_START_SIZE);
215
+ return self;
216
+ }
217
+
218
+ static void string_destroy_index(void *p)
219
+ {
220
+ StringIndex *self = (StringIndex *)p;
221
+ int i;
222
+ free(self->index);
223
+ for (i = 0; i < self->v_size; i++) {
224
+ free(self->values[i]);
225
+ }
226
+ free(self->values);
227
+ free(self);
228
+ }
229
+
230
+ static void string_handle_term(void *index_ptr,
231
+ TermDocEnum *tde,
232
+ const char *text)
233
+ {
234
+ StringIndex *index = (StringIndex *)index_ptr;
235
+ if (index->v_size >= index->v_capa) {
236
+ index->v_capa *= 2;
237
+ index->values = REALLOC_N(index->values, char *, index->v_capa);
238
+ }
239
+ index->values[index->v_size] = estrdup(text);
240
+ while (tde->next(tde)) {
241
+ index->index[tde->doc_num(tde)] = index->v_size;
242
+ }
243
+ index->v_size++;
244
+ }
245
+
246
+ const FieldIndexClass STRING_FIELD_INDEX_CLASS = {
247
+ "string",
248
+ &string_create_index,
249
+ &string_destroy_index,
250
+ &string_handle_term
251
+ };
252
+
253
+ const char *get_string_value(FieldIndex *field_index, long doc_num)
254
+ {
255
+ if (field_index->klass == &STRING_FIELD_INDEX_CLASS) {
256
+ StringIndex *string_index = (StringIndex *)field_index->index;
257
+ if (doc_num >= 0 && doc_num < string_index->size) {
258
+ return string_index->values[string_index->index[doc_num]];
259
+ }
260
+ }
261
+ return NULL;
262
+ }
data/ext/field_index.h ADDED
@@ -0,0 +1,52 @@
1
+ #ifndef FRT_FIELD_INDEX_H
2
+ #define FRT_FIELD_INDEX_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "index.h"
9
+
10
+ /***************************************************************************
11
+ *
12
+ * FrtFieldIndex
13
+ *
14
+ ***************************************************************************/
15
+
16
+ typedef struct FrtStringIndex {
17
+ int size;
18
+ long *index;
19
+ char **values;
20
+ int v_size;
21
+ int v_capa;
22
+ } FrtStringIndex;
23
+
24
+ typedef struct FrtFieldIndexClass {
25
+ const char *type;
26
+ void *(*create_index)(int size);
27
+ void (*destroy_index)(void *p);
28
+ void (*handle_term)(void *index, FrtTermDocEnum *tde, const char *text);
29
+ } FrtFieldIndexClass;
30
+
31
+ typedef struct FrtFieldIndex {
32
+ FrtSymbol field;
33
+ const FrtFieldIndexClass *klass;
34
+ void *index;
35
+ } FrtFieldIndex;
36
+
37
+ extern const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS;
38
+ extern const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS;
39
+ extern const FrtFieldIndexClass FRT_STRING_FIELD_INDEX_CLASS;
40
+ extern const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS;
41
+
42
+ extern FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, FrtSymbol field,
43
+ const FrtFieldIndexClass *klass);
44
+ extern const char *frt_get_string_value(FrtFieldIndex *field_index, long doc_num);
45
+ extern float frt_get_float_value(FrtFieldIndex *field_index, long doc_num);
46
+ extern long frt_get_integer_value(FrtFieldIndex *field_index, long doc_num);
47
+
48
+ #ifdef __cplusplus
49
+ } // extern "C"
50
+ #endif
51
+
52
+ #endif
data/ext/filter.c ADDED
@@ -0,0 +1,157 @@
1
+ #include "search.h"
2
+ #include "symbol.h"
3
+ #include <string.h>
4
+ #include "internal.h"
5
+
6
+ /***************************************************************************
7
+ *
8
+ * Filter
9
+ *
10
+ ***************************************************************************/
11
+
12
+ void filt_destroy_i(Filter *filt)
13
+ {
14
+ h_destroy(filt->cache);
15
+ free(filt);
16
+ }
17
+ void filt_deref(Filter *filt)
18
+ {
19
+ if (--(filt->ref_cnt) == 0) {
20
+ filt->destroy_i(filt);
21
+ }
22
+ }
23
+
24
+ BitVector *filt_get_bv(Filter *filt, IndexReader *ir)
25
+ {
26
+ CacheObject *co = (CacheObject *)h_get(filt->cache, ir);
27
+
28
+ if (!co) {
29
+ BitVector *bv;
30
+ if (!ir->cache) {
31
+ ir_add_cache(ir);
32
+ }
33
+ bv = filt->get_bv_i(filt, ir);
34
+ co = co_create(filt->cache, ir->cache, filt, ir,
35
+ (free_ft)&bv_destroy, (void *)bv);
36
+ }
37
+ return (BitVector *)co->obj;
38
+ }
39
+
40
+ static char *filt_to_s_i(Filter *filt)
41
+ {
42
+ return estrdup(S(filt->name));
43
+ }
44
+
45
+ static unsigned long filt_hash_default(Filter *filt)
46
+ {
47
+ (void)filt;
48
+ return 0;
49
+ }
50
+
51
+ static int filt_eq_default(Filter *filt, Filter *o)
52
+ {
53
+ (void)filt; (void)o;
54
+ return false;
55
+ }
56
+
57
+ Filter *filt_create(size_t size, Symbol name)
58
+ {
59
+ Filter *filt = (Filter *)emalloc(size);
60
+ filt->cache = co_hash_create();
61
+ filt->name = name;
62
+ filt->to_s = &filt_to_s_i;
63
+ filt->hash = &filt_hash_default;
64
+ filt->eq = &filt_eq_default;
65
+ filt->destroy_i = &filt_destroy_i;
66
+ filt->ref_cnt = 1;
67
+ return filt;
68
+ }
69
+
70
+ unsigned long filt_hash(Filter *filt)
71
+ {
72
+ return sym_hash(filt->name) ^ filt->hash(filt);
73
+ }
74
+
75
+ int filt_eq(Filter *filt, Filter *o)
76
+ {
77
+ return ((filt == o)
78
+ || ((filt->name == o->name)
79
+ && (filt->eq == o->eq)
80
+ && (filt->eq(filt, o))));
81
+ }
82
+
83
+ /***************************************************************************
84
+ *
85
+ * QueryFilter
86
+ *
87
+ ***************************************************************************/
88
+
89
+ #define QF(filt) ((QueryFilter *)(filt))
90
+ typedef struct QueryFilter
91
+ {
92
+ Filter super;
93
+ Query *query;
94
+ } QueryFilter;
95
+
96
+ static char *qfilt_to_s(Filter *filt)
97
+ {
98
+ Query *query = QF(filt)->query;
99
+ char *query_str = query->to_s(query, NULL);
100
+ char *filter_str = strfmt("QueryFilter< %s >", query_str);
101
+ free(query_str);
102
+ return filter_str;
103
+ }
104
+
105
+ static BitVector *qfilt_get_bv_i(Filter *filt, IndexReader *ir)
106
+ {
107
+ BitVector *bv = bv_new_capa(ir->max_doc(ir));
108
+ Searcher *sea = isea_new(ir);
109
+ Weight *weight = q_weight(QF(filt)->query, sea);
110
+ Scorer *scorer = weight->scorer(weight, ir);
111
+ if (scorer) {
112
+ while (scorer->next(scorer)) {
113
+ bv_set(bv, scorer->doc);
114
+ }
115
+ scorer->destroy(scorer);
116
+ }
117
+ weight->destroy(weight);
118
+ free(sea);
119
+ return bv;
120
+ }
121
+
122
+ static unsigned long qfilt_hash(Filter *filt)
123
+ {
124
+ return q_hash(QF(filt)->query);
125
+ }
126
+
127
+ static int qfilt_eq(Filter *filt, Filter *o)
128
+ {
129
+ return q_eq(QF(filt)->query, QF(o)->query);
130
+ }
131
+
132
+ static void qfilt_destroy_i(Filter *filt)
133
+ {
134
+ Query *query = QF(filt)->query;
135
+ q_deref(query);
136
+ filt_destroy_i(filt);
137
+ }
138
+
139
+ Filter *qfilt_new_nr(Query *query)
140
+ {
141
+ Filter *filt = filt_new(QueryFilter);
142
+
143
+ QF(filt)->query = query;
144
+
145
+ filt->get_bv_i = &qfilt_get_bv_i;
146
+ filt->hash = &qfilt_hash;
147
+ filt->eq = &qfilt_eq;
148
+ filt->to_s = &qfilt_to_s;
149
+ filt->destroy_i = &qfilt_destroy_i;
150
+ return filt;
151
+ }
152
+
153
+ Filter *qfilt_new(Query *query)
154
+ {
155
+ REF(query);
156
+ return qfilt_new_nr(query);
157
+ }