ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_index_io.c DELETED
@@ -1,1021 +0,0 @@
1
- #include "ferret.h"
2
- #include "index.h"
3
-
4
- VALUE cTVOffsetInfo;
5
- VALUE cTermVector;
6
- VALUE cTermDocEnum;
7
- VALUE cIndexWriter;
8
- VALUE cIndexReader;
9
- VALUE cTermEnum;
10
-
11
- VALUE ranalyzer_key;
12
- VALUE rclose_dir_key;
13
- VALUE rcreate_key;
14
- VALUE rcreate_if_missing_key;
15
- VALUE ruse_compound_file_key;
16
- VALUE rmerge_factor_key;
17
- VALUE rmin_merge_docs_key;
18
- VALUE rmax_merge_docs_key;
19
- VALUE rmax_field_length_key;
20
- VALUE rterm_index_interval_key;
21
-
22
- extern void frt_set_term(VALUE rterm, Term *t);
23
- extern VALUE frt_get_rterm(char *field, char *text);
24
- extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
25
-
26
- /****************************************************************************
27
- *
28
- * TermEnum Methods
29
- *
30
- ****************************************************************************/
31
-
32
- static void
33
- frt_te_free(void *p)
34
- {
35
- TermEnum *te = (TermEnum *)p;
36
- te->close(te);
37
- }
38
-
39
- #define GET_TE TermEnum *te = (TermEnum *)DATA_PTR(self)
40
- static VALUE
41
- frt_te_next(VALUE self)
42
- {
43
- GET_TE;
44
- return te->next(te) ? Qtrue : Qfalse;
45
- }
46
-
47
- static VALUE
48
- frt_te_term(VALUE self)
49
- {
50
- GET_TE;
51
- if (!te->tb_curr) return Qnil;
52
- return frt_get_rterm(te->tb_curr->field, te->tb_curr->text);
53
- }
54
-
55
- static VALUE
56
- frt_te_doc_freq(VALUE self)
57
- {
58
- GET_TE;
59
- if (!te->tb_curr) return Qnil;
60
- return INT2FIX(te->ti_curr->doc_freq);
61
- }
62
-
63
- static VALUE
64
- frt_te_close(VALUE self)
65
- {
66
- GET_TE;
67
- Frt_Unwrap_Struct(self);
68
- te->close(te);
69
- return Qnil;
70
- }
71
-
72
- static VALUE
73
- frt_te_skip_to(VALUE self, VALUE rterm)
74
- {
75
- GET_TE;
76
- Term t;
77
- frt_set_term(rterm, &t);
78
-
79
- return te_skip_to(te, &t) ? Qtrue : Qfalse;
80
- }
81
-
82
- /****************************************************************************
83
- *
84
- * TermVectorOffsetInfo Methods
85
- *
86
- ****************************************************************************/
87
-
88
- void
89
- frt_tvoi_free(void *p)
90
- {
91
- object_del(p);
92
- tvoi_destroy(p);
93
- }
94
-
95
- static VALUE
96
- frt_tvoi_init(VALUE self, VALUE rstart, VALUE rend)
97
- {
98
- TVOffsetInfo *tvoi = tvoi_create(FIX2INT(rstart), FIX2INT(rend));
99
- Frt_Wrap_Struct(self, NULL, &frt_tvoi_free, tvoi);
100
- object_add(tvoi, self);
101
- return self;
102
- }
103
-
104
- #define GET_TVOI TVOffsetInfo *tvoi = (TVOffsetInfo *)DATA_PTR(self)
105
-
106
- static VALUE
107
- frt_tvoi_set_start(VALUE self, VALUE rstart)
108
- {
109
- GET_TVOI;
110
- tvoi->start = FIX2INT(rstart);
111
- return Qnil;
112
- }
113
-
114
- static VALUE
115
- frt_tvoi_get_start(VALUE self)
116
- {
117
- GET_TVOI;
118
- return INT2FIX(tvoi->start);
119
- }
120
-
121
- static VALUE
122
- frt_tvoi_set_end(VALUE self, VALUE rend)
123
- {
124
- GET_TVOI;
125
- tvoi->end = FIX2INT(rend);
126
- return Qnil;
127
- }
128
-
129
- static VALUE
130
- frt_tvoi_get_end(VALUE self)
131
- {
132
- GET_TVOI;
133
- return INT2FIX(tvoi->end);
134
- }
135
-
136
- static VALUE
137
- frt_tvoi_eql(VALUE self, VALUE rother)
138
- {
139
- GET_TVOI;
140
- TVOffsetInfo *other;
141
- if (TYPE(rother) != T_DATA) return Qfalse;
142
- Data_Get_Struct(rother, TVOffsetInfo, other);
143
-
144
- return ((tvoi->start == other->start) && (tvoi->end == other->end))
145
- ? Qtrue : Qfalse;
146
- }
147
-
148
- static VALUE
149
- frt_tvoi_hash(VALUE self, VALUE rother)
150
- {
151
- GET_TVOI;
152
- return INT2FIX(29 * tvoi->start + tvoi->end);
153
- }
154
-
155
- static VALUE
156
- frt_tvoi_to_s(VALUE self)
157
- {
158
- char buf[60];
159
- GET_TVOI;
160
- sprintf(buf, "TermVectorOffsetInfo(%d:%d)", tvoi->start, tvoi->end);
161
- return rb_str_new2(buf);
162
- }
163
-
164
- /****************************************************************************
165
- *
166
- * TermVector Methods
167
- *
168
- ****************************************************************************/
169
-
170
- void
171
- frt_tv_free(void *p)
172
- {
173
- int i;
174
- TermVector *tv = (TermVector *)p;
175
- for (i = 0; i < tv->tcnt; i++) {
176
- free(tv->terms[i]);
177
- }
178
- free(tv->terms);
179
- if (tv->positions) {
180
- for (i = 0; i < tv->tcnt; i++) {
181
- free(tv->positions[i]);
182
- }
183
- free(tv->positions);
184
- }
185
- if (tv->offsets) {
186
- for (i = 0; i < tv->tcnt; i++) {
187
- free(tv->offsets[i]);
188
- }
189
- free(tv->offsets);
190
- }
191
- free(tv->freqs);
192
- object_del(p);
193
- free(p);
194
- }
195
-
196
- void
197
- frt_tv_mark(void *p)
198
- {
199
- int i, j;
200
- TermVector *tv = (TermVector *)p;
201
- if (tv->offsets != NULL) {
202
- for (i = 0; i < tv->tcnt; i++) {
203
- for (j = 0; j < tv->freqs[i]; j++) {
204
- frt_gc_mark(tv->offsets[i][j]);
205
- }
206
- }
207
- }
208
- }
209
-
210
- static VALUE
211
- frt_get_tv(TermVector *tv)
212
- {
213
- VALUE self = Qnil;
214
- if (tv) {
215
- self = object_get(tv);
216
- if (self == Qnil) {
217
- self = Data_Wrap_Struct(cTermVector, &frt_tv_mark, &frt_tv_free, tv);
218
- if (tv->offsets) {
219
- TVOffsetInfo *tvoi;
220
- VALUE rtvoi;
221
- int i, j;
222
- for (i = 0; i < tv->tcnt; i++) {
223
- for (j = 0; j < tv->freqs[i]; j++) {
224
- tvoi = tv->offsets[i][j];
225
- if (object_get(tvoi) == Qnil) {
226
- rtvoi = Data_Wrap_Struct(cTVOffsetInfo, NULL, &frt_tvoi_free, tvoi);
227
- object_add(tvoi, rtvoi);
228
- }
229
- }
230
- }
231
- }
232
- object_add(tv, self);
233
- }
234
- }
235
- return self;
236
- }
237
-
238
- #define GET_TV TermVector *tv = (TermVector *)DATA_PTR(self)
239
-
240
- static VALUE
241
- frt_tv_get_field(VALUE self)
242
- {
243
- GET_TV;
244
- return rb_str_new2(tv->field);
245
- }
246
-
247
- static VALUE
248
- frt_tv_get_terms(VALUE self)
249
- {
250
- int i;
251
- GET_TV;
252
- VALUE rterms = rb_ary_new2(tv->tcnt);
253
- for (i = 0; i < tv->tcnt; i++) {
254
- rb_ary_push(rterms, rb_str_new2(tv->terms[i]));
255
- }
256
- return rterms;
257
- }
258
-
259
- static VALUE
260
- frt_tv_get_freqs(VALUE self)
261
- {
262
- int i;
263
- GET_TV;
264
- VALUE rfreqs = rb_ary_new2(tv->tcnt);
265
- for (i = 0; i < tv->tcnt; i++) {
266
- rb_ary_push(rfreqs, INT2FIX(tv->freqs[i]));
267
- }
268
- return rfreqs;
269
- }
270
-
271
- static VALUE
272
- frt_tv_get_positions(VALUE self)
273
- {
274
- int i, j, freq;
275
- GET_TV;
276
- VALUE rpositions, rpositionss;
277
-
278
- if (!tv->positions) return Qnil;
279
- rpositionss = rb_ary_new2(tv->tcnt);
280
- for (i = 0; i < tv->tcnt; i++) {
281
- freq = tv->freqs[i];
282
- rpositions = rb_ary_new2(freq);
283
- for (j = 0; j < freq; j++) {
284
- rb_ary_push(rpositions, INT2FIX(tv->positions[i][j]));
285
- }
286
- rb_ary_push(rpositionss, rpositions);
287
- }
288
- return rpositionss;
289
- }
290
-
291
- static VALUE
292
- frt_tv_get_offsets(VALUE self)
293
- {
294
- int i, j, freq;
295
- GET_TV;
296
- VALUE roffsetss, roffsets, roffset;
297
- if (!tv->offsets) return Qnil;
298
- roffsetss = rb_ary_new2(tv->tcnt);
299
-
300
- for (i = 0; i < tv->tcnt; i++) {
301
- freq = tv->freqs[i];
302
- roffsets = rb_ary_new2(freq);
303
- for (j = 0; j < freq; j++) {
304
- roffset = object_get(tv->offsets[i][j]);
305
- rb_ary_push(roffsets, roffset);
306
- }
307
- rb_ary_push(roffsetss, roffsets);
308
- }
309
- return roffsetss;
310
- }
311
-
312
- /****************************************************************************
313
- *
314
- * TermDocEnum Methods
315
- *
316
- ****************************************************************************/
317
-
318
- void
319
- frt_tde_free(void *p)
320
- {
321
- TermDocEnum *tde = (TermDocEnum *)p;
322
- tde->close(tde);
323
- }
324
-
325
- static VALUE
326
- frt_get_tde(TermDocEnum *tde)
327
- {
328
- return Data_Wrap_Struct(cTermDocEnum, NULL, &frt_tde_free, tde);
329
- }
330
-
331
- #define GET_TDE TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self)
332
-
333
- static VALUE
334
- frt_tde_close(VALUE self)
335
- {
336
- GET_TDE;
337
- Frt_Unwrap_Struct(self);
338
- tde->close(tde);
339
- return Qnil;
340
- }
341
-
342
- static VALUE
343
- frt_tde_seek(VALUE self, VALUE rterm)
344
- {
345
- GET_TDE;
346
- Term t;
347
- frt_set_term(rterm, &t);
348
- tde->seek(tde, &t);
349
- return Qnil;
350
- }
351
-
352
- static VALUE
353
- frt_tde_doc(VALUE self)
354
- {
355
- GET_TDE;
356
- return INT2FIX(tde->doc_num(tde));
357
- }
358
-
359
- static VALUE
360
- frt_tde_freq(VALUE self)
361
- {
362
- GET_TDE;
363
- return INT2FIX(tde->freq(tde));
364
- }
365
-
366
- static VALUE
367
- frt_tde_next(VALUE self)
368
- {
369
- GET_TDE;
370
- return tde->next(tde) ? Qtrue : Qfalse;
371
- }
372
-
373
- static VALUE
374
- frt_tde_next_position(VALUE self)
375
- {
376
- GET_TDE;
377
- return INT2FIX(tde->next_position(tde));
378
- }
379
-
380
- static VALUE
381
- frt_tde_read(VALUE self, VALUE rdocs, VALUE rfreqs)
382
- {
383
- int i, req_num, cnt;
384
- GET_TDE;
385
- Check_Type(rdocs, T_ARRAY);
386
- Check_Type(rfreqs, T_ARRAY);
387
- req_num = MIN(RARRAY(rdocs)->len, RARRAY(rfreqs)->len);
388
- cnt = tde->read(tde, (int *)RARRAY(rdocs)->ptr,
389
- (int *)RARRAY(rfreqs)->ptr, req_num);
390
- for (i = 0; i < cnt; i++) {
391
- RARRAY(rdocs)->ptr[i] = INT2FIX(RARRAY(rdocs)->ptr[i]);
392
- RARRAY(rfreqs)->ptr[i] = INT2FIX(RARRAY(rfreqs)->ptr[i]);
393
- }
394
- return INT2FIX(cnt);
395
- }
396
-
397
- static VALUE
398
- frt_tde_skip_to(VALUE self, VALUE rtarget)
399
- {
400
- GET_TDE;
401
- return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
402
- }
403
-
404
- /****************************************************************************
405
- *
406
- * IndexWriter Methods
407
- *
408
- ****************************************************************************/
409
-
410
- void
411
- frt_iw_free(void *p)
412
- {
413
- IndexWriter *iw = (IndexWriter *)p;
414
- iw_close(iw);
415
- }
416
-
417
- void
418
- frt_iw_mark(void *p)
419
- {
420
- IndexWriter *iw = (IndexWriter *)p;
421
- frt_gc_mark(iw->analyzer);
422
- frt_gc_mark(iw->store);
423
- }
424
-
425
- #define SET_INT_ATTR(attr) \
426
- if (RTEST(rval = rb_hash_aref(roptions, r##attr##_key)))\
427
- iw->attr = FIX2INT(rval);
428
-
429
- static VALUE
430
- frt_iw_init(int argc, VALUE *argv, VALUE self)
431
- {
432
- VALUE rdir, roptions, rval;
433
- bool create = false;
434
- bool use_compound_file = true;
435
- Store *store;
436
- Analyzer *analyzer = NULL;
437
- IndexWriter *iw;
438
- rb_scan_args(argc, argv, "02", &rdir, &roptions);
439
- if (argc > 0) {
440
- if (TYPE(rdir) == T_DATA) {
441
- store = DATA_PTR(rdir);
442
- ref(store);
443
- } else {
444
- StringValue(rdir);
445
- frt_create_dir(rdir);
446
- store = open_fs_store(RSTRING(rdir)->ptr);
447
- }
448
- } else {
449
- store = open_ram_store();
450
- }
451
- if (argc == 2) {
452
- Check_Type(roptions, T_HASH);
453
- /* Let ruby's GC handle the closing of the store
454
- if (!close_dir) {
455
- close_dir = RTEST(rb_hash_aref(roptions, rclose_dir_key));
456
- }
457
- */
458
- /* use_compound_file defaults to true */
459
- use_compound_file =
460
- (rb_hash_aref(roptions, ruse_compound_file_key) == Qfalse) ? false : true;
461
-
462
- rval = rb_hash_aref(roptions, ranalyzer_key);
463
- if (rval == Qnil) {
464
- analyzer = mb_standard_analyzer_create(true);
465
- } else {
466
- analyzer = frt_get_cwrapped_analyzer(rval);
467
- }
468
- create = RTEST(rb_hash_aref(roptions, rcreate_key));
469
- if (!create && RTEST(rb_hash_aref(roptions, rcreate_if_missing_key))) {
470
- if (!store->exists(store, "segments")) {
471
- create = true;
472
- }
473
- }
474
- }
475
- iw = iw_open(store, analyzer, create);
476
- store_deref(store);
477
- iw->use_compound_file = use_compound_file;
478
-
479
- SET_INT_ATTR(merge_factor);
480
- SET_INT_ATTR(min_merge_docs);
481
- SET_INT_ATTR(max_merge_docs);
482
- SET_INT_ATTR(max_field_length);
483
- SET_INT_ATTR(term_index_interval);
484
-
485
- Frt_Wrap_Struct(self, &frt_iw_mark, &frt_iw_free, iw);
486
- return self;
487
- }
488
-
489
- #define GET_IW IndexWriter *iw = (IndexWriter *)DATA_PTR(self)
490
-
491
- static VALUE
492
- frt_iw_close(VALUE self)
493
- {
494
- GET_IW;
495
- Frt_Unwrap_Struct(self);
496
- iw_close(iw);
497
- return Qnil;
498
- }
499
-
500
- static VALUE
501
- frt_iw_add_doc(VALUE self, VALUE rdoc)
502
- {
503
- GET_IW;
504
- Document *doc;
505
- Data_Get_Struct(rdoc, Document, doc);
506
- iw_add_doc(iw, doc);
507
- return Qnil;
508
- }
509
-
510
- static VALUE
511
- frt_iw_set_merge_factor(VALUE self, VALUE val)
512
- {
513
- GET_IW;
514
- iw->merge_factor = FIX2INT(val);
515
- return Qnil;
516
- }
517
-
518
- static VALUE
519
- frt_iw_set_min_merge_docs(VALUE self, VALUE val)
520
- {
521
- GET_IW;
522
- iw->min_merge_docs = FIX2INT(val);
523
- return Qnil;
524
- }
525
-
526
- static VALUE
527
- frt_iw_set_max_merge_docs(VALUE self, VALUE val)
528
- {
529
- GET_IW;
530
- iw->max_merge_docs = FIX2INT(val);
531
- return Qnil;
532
- }
533
-
534
- static VALUE
535
- frt_iw_set_max_field_length(VALUE self, VALUE val)
536
- {
537
- GET_IW;
538
- iw->max_field_length = FIX2INT(val);
539
- return Qnil;
540
- }
541
-
542
- static VALUE
543
- frt_iw_set_term_index_interval(VALUE self, VALUE val)
544
- {
545
- GET_IW;
546
- iw->term_index_interval = FIX2INT(val);
547
- return Qnil;
548
- }
549
-
550
- static VALUE
551
- frt_iw_set_use_compound_file(VALUE self, VALUE val)
552
- {
553
- GET_IW;
554
- iw->use_compound_file = FIX2INT(val);
555
- return Qnil;
556
- }
557
-
558
- static VALUE
559
- frt_iw_get_doc_count(VALUE self)
560
- {
561
- GET_IW;
562
- return INT2FIX(iw_doc_count(iw));
563
- }
564
-
565
- static VALUE
566
- frt_iw_get_merge_factor(VALUE self)
567
- {
568
- GET_IW;
569
- return INT2FIX(iw->merge_factor);
570
- }
571
-
572
- static VALUE
573
- frt_iw_get_min_merge_docs(VALUE self)
574
- {
575
- GET_IW;
576
- return INT2FIX(iw->min_merge_docs);
577
- }
578
-
579
- static VALUE
580
- frt_iw_get_max_merge_docs(VALUE self)
581
- {
582
- GET_IW;
583
- return INT2FIX(iw->max_merge_docs);
584
- }
585
-
586
- static VALUE
587
- frt_iw_get_max_field_length(VALUE self)
588
- {
589
- GET_IW;
590
- return INT2FIX(iw->max_field_length);
591
- }
592
-
593
- static VALUE
594
- frt_iw_get_term_index_interval(VALUE self)
595
- {
596
- GET_IW;
597
- return INT2FIX(iw->term_index_interval);
598
- }
599
-
600
- static VALUE
601
- frt_iw_get_use_compound_file(VALUE self)
602
- {
603
- GET_IW;
604
- return INT2FIX(iw->use_compound_file);
605
- }
606
-
607
- static VALUE
608
- frt_iw_optimize(VALUE self)
609
- {
610
- GET_IW;
611
- iw_optimize(iw);
612
- return Qnil;
613
- }
614
-
615
- /****************************************************************************
616
- *
617
- * IndexReader Methods
618
- *
619
- ****************************************************************************/
620
-
621
- void
622
- frt_ir_free(void *p)
623
- {
624
- object_del(p);
625
- ir_close((IndexReader *)p);
626
- }
627
-
628
- void
629
- frt_ir_mark(void *p)
630
- {
631
- IndexReader *ir = (IndexReader *)p;
632
- frt_gc_mark(ir->store);
633
- }
634
-
635
- static VALUE
636
- frt_ir_init(int argc, VALUE *argv, VALUE self)
637
- {
638
- VALUE rdir, rclose_dir;
639
- //bool close_dir = false;
640
- Store *store = NULL;
641
- IndexReader *ir;
642
- switch (rb_scan_args(argc, argv, "11", &rdir, &rclose_dir)) {
643
- case 2: //close_dir = RTEST(rclose_dir);
644
- case 1:
645
- if (TYPE(rdir) == T_DATA) {
646
- store = DATA_PTR(rdir);
647
- } else {
648
- rdir = rb_obj_as_string(rdir);
649
- frt_create_dir(rdir);
650
- store = open_fs_store(RSTRING(rdir)->ptr);
651
- deref(store);
652
- }
653
- }
654
- ir = ir_open(store);
655
- Frt_Wrap_Struct(self, &frt_ir_mark, &frt_ir_free, ir);
656
- object_add(ir, self);
657
- return self;
658
- }
659
-
660
- static VALUE
661
- frt_ir_open(int argc, VALUE *argv, VALUE klass)
662
- {
663
- VALUE self = Frt_Make_Struct(klass);
664
- return frt_ir_init(argc, argv, self);
665
- }
666
-
667
- #define GET_IR IndexReader *ir = (IndexReader *)DATA_PTR(self)
668
-
669
- static VALUE
670
- frt_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval)
671
- {
672
- GET_IR;
673
- rfield = rb_obj_as_string(rfield);
674
- ir_set_norm(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr, NUM2CHR(rval));
675
- return Qnil;
676
- }
677
-
678
- static VALUE
679
- frt_ir_get_norms(VALUE self, VALUE rfield)
680
- {
681
- GET_IR;
682
- uchar *norms;
683
- rfield = rb_obj_as_string(rfield);
684
- norms = ir->get_norms(ir, RSTRING(rfield)->ptr);
685
- if (norms) {
686
- return rb_str_new((char *)norms, ir->max_doc(ir));
687
- } else {
688
- return Qnil;
689
- }
690
- }
691
-
692
- static VALUE
693
- frt_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
694
- {
695
- GET_IR;
696
- int offset;
697
- rfield = rb_obj_as_string(rfield);
698
- offset = FIX2INT(roffset);
699
- Check_Type(rnorms, T_STRING);
700
- if (RSTRING(rnorms)->len < offset + ir->max_doc(ir)) {
701
- rb_raise(rb_eArgError, "supplied a string of length:%d to IndexReader#get_norms_into but needed a string of length offset:%d + maxdoc:%d", RSTRING(rnorms)->len, offset, ir->max_doc(ir));
702
- }
703
-
704
- ir->get_norms_into(ir, RSTRING(rfield)->ptr, (uchar *)RSTRING(rnorms)->ptr, offset);
705
- return Qnil;
706
- }
707
-
708
- static VALUE
709
- frt_ir_commit(VALUE self)
710
- {
711
- GET_IR;
712
- ir_commit(ir);
713
- return Qnil;
714
- }
715
-
716
- static VALUE
717
- frt_ir_close(VALUE self)
718
- {
719
- GET_IR;
720
- object_del(ir);
721
- Frt_Unwrap_Struct(self);
722
- ir_close(ir);
723
- return Qnil;
724
- }
725
-
726
- static VALUE
727
- frt_ir_has_deletions(VALUE self)
728
- {
729
- GET_IR;
730
- return ir->has_deletions(ir) ? Qtrue : Qfalse;
731
- }
732
-
733
- static VALUE
734
- frt_ir_delete(VALUE self, VALUE rdoc_num)
735
- {
736
- GET_IR;
737
- int doc_num = FIX2INT(rdoc_num);
738
- ir_delete_doc(ir, doc_num);
739
- return Qnil;
740
- }
741
-
742
- static VALUE
743
- frt_ir_is_deleted(VALUE self, VALUE rdoc_num)
744
- {
745
- GET_IR;
746
- int doc_num = FIX2INT(rdoc_num);
747
- return ir->is_deleted(ir, doc_num) ? Qtrue : Qfalse;
748
- }
749
-
750
- static VALUE
751
- frt_ir_max_doc(VALUE self)
752
- {
753
- GET_IR;
754
- return INT2FIX(ir->max_doc(ir));
755
- }
756
-
757
- static VALUE
758
- frt_ir_num_docs(VALUE self)
759
- {
760
- GET_IR;
761
- return INT2FIX(ir->num_docs(ir));
762
- }
763
-
764
- static VALUE
765
- frt_ir_undelete_all(VALUE self)
766
- {
767
- GET_IR;
768
- ir_undelete_all(ir);
769
- return Qnil;
770
- }
771
-
772
- static VALUE
773
- frt_ir_get_doc(VALUE self, VALUE rdoc_num)
774
- {
775
- GET_IR;
776
- Document *doc = ir->get_doc(ir, FIX2INT(rdoc_num));
777
- return frt_get_doc(doc);
778
- }
779
-
780
- static VALUE
781
- frt_ir_is_latest(VALUE self)
782
- {
783
- GET_IR;
784
- return ir_is_latest(ir) ? Qtrue : Qfalse;
785
- }
786
-
787
- static VALUE
788
- frt_ir_get_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield)
789
- {
790
- GET_IR;
791
- TermVector *tv;
792
- rfield = rb_obj_as_string(rfield);
793
- tv = ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
794
- return frt_get_tv(tv);
795
- }
796
-
797
- static VALUE
798
- frt_ir_get_term_vectors(VALUE self, VALUE rdoc_num)
799
- {
800
- int i;
801
- GET_IR;
802
- Array *tvs = ir->get_term_vectors(ir, FIX2INT(rdoc_num));
803
- VALUE rtvs = rb_ary_new2(tvs->size);
804
- VALUE rtv;
805
- for (i = 0; i < tvs->size; i++) {
806
- rtv = frt_get_tv(tvs->elems[i]);
807
- rb_ary_push(rtvs, rtv);
808
- }
809
- tvs->free_elem = NULL;
810
- ary_destroy(tvs);
811
-
812
- return rtvs;
813
- }
814
-
815
- static VALUE
816
- frt_ir_term_docs(VALUE self)
817
- {
818
- GET_IR;
819
- return frt_get_tde(ir->term_docs(ir));
820
- }
821
-
822
- static VALUE
823
- frt_ir_term_docs_for(VALUE self, VALUE rterm)
824
- {
825
- GET_IR;
826
- Term t;
827
- frt_set_term(rterm, &t);
828
- return frt_get_tde(ir_term_docs_for(ir, &t));
829
- }
830
-
831
- static VALUE
832
- frt_ir_term_positions(VALUE self)
833
- {
834
- GET_IR;
835
- return frt_get_tde(ir->term_positions(ir));
836
- }
837
-
838
- static VALUE
839
- frt_ir_term_positions_for(VALUE self, VALUE rterm)
840
- {
841
- GET_IR;
842
- Term t;
843
- frt_set_term(rterm, &t);
844
- return frt_get_tde(ir_term_positions_for(ir, &t));
845
- }
846
-
847
- static VALUE
848
- frt_ir_doc_freq(VALUE self, VALUE rterm)
849
- {
850
- GET_IR;
851
- Term t;
852
- frt_set_term(rterm, &t);
853
- return INT2FIX(ir->doc_freq(ir, &t));
854
- }
855
-
856
- static VALUE
857
- frt_ir_terms(VALUE self)
858
- {
859
- TermEnum *te;
860
- GET_IR;
861
- te = ir->terms(ir);
862
- return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
863
- }
864
-
865
- static VALUE
866
- frt_ir_terms_from(VALUE self, VALUE rterm)
867
- {
868
- TermEnum *te;
869
- Term t;
870
- GET_IR;
871
- frt_set_term(rterm, &t);
872
- te = ir->terms_from(ir, &t);
873
- return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
874
- }
875
-
876
- static VALUE
877
- frt_ir_get_field_names(VALUE self)
878
- {
879
- GET_IR;
880
- VALUE rfnames;
881
- HashSet *fnames = ir->get_field_names(ir, IR_ALL);
882
- rfnames = frt_hs_to_rb_ary(fnames);
883
- hs_destroy(fnames);
884
- return rfnames;
885
- }
886
-
887
- /****************************************************************************
888
- *
889
- * Init Function
890
- *
891
- ****************************************************************************/
892
-
893
- void
894
- Init_index_io(void)
895
- {
896
- ranalyzer_key = ID2SYM(rb_intern("analyzer"));
897
- rclose_dir_key = ID2SYM(rb_intern("close_dir"));
898
- rcreate_key = ID2SYM(rb_intern("create"));
899
- rcreate_if_missing_key = ID2SYM(rb_intern("create_if_missing"));
900
- ruse_compound_file_key = ID2SYM(rb_intern("use_compound_file"));
901
- rmerge_factor_key = ID2SYM(rb_intern("merge_factor"));
902
- rmin_merge_docs_key = ID2SYM(rb_intern("min_merge_docs"));
903
- rmax_merge_docs_key = ID2SYM(rb_intern("max_merge_docs"));
904
- rmax_field_length_key = ID2SYM(rb_intern("max_field_length"));
905
- rterm_index_interval_key = ID2SYM(rb_intern("term_index_interval"));
906
-
907
- /* TermEnum */
908
- cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
909
- rb_define_alloc_func(cTermEnum, frt_data_alloc);
910
-
911
- rb_define_method(cTermEnum, "next?", frt_te_next, 0);
912
- rb_define_method(cTermEnum, "term", frt_te_term, 0);
913
- rb_define_method(cTermEnum, "doc_freq", frt_te_doc_freq, 0);
914
- rb_define_method(cTermEnum, "skip_to", frt_te_skip_to, 1);
915
- rb_define_method(cTermEnum, "close", frt_te_close, 0);
916
-
917
- /* TermVectorOffsetInfo */
918
- cTVOffsetInfo = rb_define_class_under(mIndex, "TermVectorOffsetInfo", rb_cObject);
919
- rb_define_alloc_func(cTVOffsetInfo, frt_data_alloc);
920
-
921
- rb_define_method(cTVOffsetInfo, "initialize", frt_tvoi_init, 2);
922
- rb_define_method(cTVOffsetInfo, "start=", frt_tvoi_set_start, 1);
923
- rb_define_method(cTVOffsetInfo, "start", frt_tvoi_get_start, 0);
924
- rb_define_method(cTVOffsetInfo, "end=", frt_tvoi_set_end, 1);
925
- rb_define_method(cTVOffsetInfo, "end", frt_tvoi_get_end, 0);
926
- rb_define_method(cTVOffsetInfo, "eql?", frt_tvoi_eql, 1);
927
- rb_define_method(cTVOffsetInfo, "==", frt_tvoi_eql, 1);
928
- rb_define_method(cTVOffsetInfo, "hash", frt_tvoi_hash, 0);
929
- rb_define_method(cTVOffsetInfo, "to_s", frt_tvoi_to_s, 0);
930
-
931
- /* TermVector */
932
- cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
933
- rb_define_alloc_func(cTermVector, frt_data_alloc);
934
- rb_define_method(cTermVector, "field", frt_tv_get_field, 0);
935
- rb_define_method(cTermVector, "terms", frt_tv_get_terms, 0);
936
- rb_define_method(cTermVector, "freqs", frt_tv_get_freqs, 0);
937
- rb_define_method(cTermVector, "positions", frt_tv_get_positions, 0);
938
- rb_define_method(cTermVector, "offsets", frt_tv_get_offsets, 0);
939
-
940
- /* TermDocEnum */
941
- cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
942
- rb_define_alloc_func(cTermDocEnum, frt_data_alloc);
943
- rb_define_method(cTermDocEnum, "close", frt_tde_close, 0);
944
- rb_define_method(cTermDocEnum, "seek", frt_tde_seek, 1);
945
- rb_define_method(cTermDocEnum, "doc", frt_tde_doc, 0);
946
- rb_define_method(cTermDocEnum, "freq", frt_tde_freq, 0);
947
- rb_define_method(cTermDocEnum, "next?", frt_tde_next, 0);
948
- rb_define_method(cTermDocEnum, "next_position", frt_tde_next_position, 0);
949
- rb_define_method(cTermDocEnum, "read", frt_tde_read, 2);
950
- rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
951
-
952
- /* IndexWriter */
953
- cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
954
- rb_define_alloc_func(cIndexWriter, frt_data_alloc);
955
-
956
- rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
957
- rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
958
- rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
959
- rb_str_new2(WRITE_LOCK_NAME));
960
- rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
961
- rb_str_new2(COMMIT_LOCK_NAME));
962
- rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
963
- INT2FIX(config.merge_factor));
964
- rb_define_const(cIndexWriter, "DEFAULT_MIN_MERGE_DOCS",
965
- INT2FIX(config.min_merge_docs));
966
- rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
967
- INT2FIX(config.max_merge_docs));
968
- rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
969
- INT2FIX(config.max_field_length));
970
- rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
971
- INT2FIX(config.term_index_interval));
972
-
973
- rb_define_method(cIndexWriter, "initialize", frt_iw_init, -1);
974
- rb_define_method(cIndexWriter, "close", frt_iw_close, 0);
975
- rb_define_method(cIndexWriter, "add_document", frt_iw_add_doc, 1);
976
- rb_define_method(cIndexWriter, "<<", frt_iw_add_doc, 1);
977
- rb_define_method(cIndexWriter, "merge_factor", frt_iw_get_merge_factor, 0);
978
- rb_define_method(cIndexWriter, "min_merge_docs", frt_iw_get_min_merge_docs, 0);
979
- rb_define_method(cIndexWriter, "max_merge_docs", frt_iw_get_max_merge_docs, 0);
980
- rb_define_method(cIndexWriter, "max_field_length", frt_iw_get_max_field_length, 0);
981
- rb_define_method(cIndexWriter, "term_index_interval", frt_iw_get_term_index_interval, 0);
982
- rb_define_method(cIndexWriter, "use_compound_file", frt_iw_get_use_compound_file, 0);
983
- rb_define_method(cIndexWriter, "doc_count", frt_iw_get_doc_count, 0);
984
- rb_define_method(cIndexWriter, "merge_factor=", frt_iw_set_merge_factor, 1);
985
- rb_define_method(cIndexWriter, "min_merge_docs=", frt_iw_set_min_merge_docs, 1);
986
- rb_define_method(cIndexWriter, "max_merge_docs=", frt_iw_set_max_merge_docs, 1);
987
- rb_define_method(cIndexWriter, "max_field_length=", frt_iw_set_max_field_length, 1);
988
- rb_define_method(cIndexWriter, "term_index_interval=", frt_iw_set_term_index_interval, 1);
989
- rb_define_method(cIndexWriter, "use_compound_file=", frt_iw_set_use_compound_file, 1);
990
- rb_define_method(cIndexWriter, "optimize", frt_iw_optimize, 0);
991
-
992
- /* IndexReader */
993
- cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
994
- rb_define_alloc_func(cIndexReader, frt_data_alloc);
995
- rb_define_singleton_method(cIndexReader, "open", frt_ir_open, -1);
996
- rb_define_method(cIndexReader, "initialize", frt_ir_init, -1);
997
- rb_define_method(cIndexReader, "set_norm", frt_ir_set_norm, 3);
998
- rb_define_method(cIndexReader, "get_norms", frt_ir_get_norms, 1);
999
- rb_define_method(cIndexReader, "get_norms_into", frt_ir_get_norms_into, 3);
1000
- rb_define_method(cIndexReader, "commit", frt_ir_commit, 0);
1001
- rb_define_method(cIndexReader, "close", frt_ir_close, 0);
1002
- rb_define_method(cIndexReader, "has_deletions?", frt_ir_has_deletions, 0);
1003
- rb_define_method(cIndexReader, "delete", frt_ir_delete, 1);
1004
- rb_define_method(cIndexReader, "deleted?", frt_ir_is_deleted, 1);
1005
- rb_define_method(cIndexReader, "max_doc", frt_ir_max_doc, 0);
1006
- rb_define_method(cIndexReader, "num_docs", frt_ir_num_docs, 0);
1007
- rb_define_method(cIndexReader, "undelete_all", frt_ir_undelete_all, 0);
1008
- rb_define_method(cIndexReader, "latest?", frt_ir_is_latest, 0);
1009
- rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, 1);
1010
- rb_define_method(cIndexReader, "[]", frt_ir_get_doc, 1);
1011
- rb_define_method(cIndexReader, "get_term_vector", frt_ir_get_term_vector, 2);
1012
- rb_define_method(cIndexReader, "get_term_vectors", frt_ir_get_term_vectors, 1);
1013
- rb_define_method(cIndexReader, "term_docs", frt_ir_term_docs, 0);
1014
- rb_define_method(cIndexReader, "term_positions", frt_ir_term_positions, 0);
1015
- rb_define_method(cIndexReader, "term_docs_for", frt_ir_term_docs_for, 1);
1016
- rb_define_method(cIndexReader, "term_positions_for", frt_ir_term_positions_for, 1);
1017
- rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 1);
1018
- rb_define_method(cIndexReader, "terms", frt_ir_terms, 0);
1019
- rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 1);
1020
- rb_define_method(cIndexReader, "get_field_names", frt_ir_get_field_names, 0);
1021
- }