ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_index_io.c DELETED
@@ -1,1021 +0,0 @@
1
- #include "ferret.h"
2
- #include "index.h"
3
-
4
- VALUE cTVOffsetInfo;
5
- VALUE cTermVector;
6
- VALUE cTermDocEnum;
7
- VALUE cIndexWriter;
8
- VALUE cIndexReader;
9
- VALUE cTermEnum;
10
-
11
- VALUE ranalyzer_key;
12
- VALUE rclose_dir_key;
13
- VALUE rcreate_key;
14
- VALUE rcreate_if_missing_key;
15
- VALUE ruse_compound_file_key;
16
- VALUE rmerge_factor_key;
17
- VALUE rmin_merge_docs_key;
18
- VALUE rmax_merge_docs_key;
19
- VALUE rmax_field_length_key;
20
- VALUE rterm_index_interval_key;
21
-
22
- extern void frt_set_term(VALUE rterm, Term *t);
23
- extern VALUE frt_get_rterm(char *field, char *text);
24
- extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
25
-
26
- /****************************************************************************
27
- *
28
- * TermEnum Methods
29
- *
30
- ****************************************************************************/
31
-
32
- static void
33
- frt_te_free(void *p)
34
- {
35
- TermEnum *te = (TermEnum *)p;
36
- te->close(te);
37
- }
38
-
39
- #define GET_TE TermEnum *te = (TermEnum *)DATA_PTR(self)
40
- static VALUE
41
- frt_te_next(VALUE self)
42
- {
43
- GET_TE;
44
- return te->next(te) ? Qtrue : Qfalse;
45
- }
46
-
47
- static VALUE
48
- frt_te_term(VALUE self)
49
- {
50
- GET_TE;
51
- if (!te->tb_curr) return Qnil;
52
- return frt_get_rterm(te->tb_curr->field, te->tb_curr->text);
53
- }
54
-
55
- static VALUE
56
- frt_te_doc_freq(VALUE self)
57
- {
58
- GET_TE;
59
- if (!te->tb_curr) return Qnil;
60
- return INT2FIX(te->ti_curr->doc_freq);
61
- }
62
-
63
- static VALUE
64
- frt_te_close(VALUE self)
65
- {
66
- GET_TE;
67
- Frt_Unwrap_Struct(self);
68
- te->close(te);
69
- return Qnil;
70
- }
71
-
72
- static VALUE
73
- frt_te_skip_to(VALUE self, VALUE rterm)
74
- {
75
- GET_TE;
76
- Term t;
77
- frt_set_term(rterm, &t);
78
-
79
- return te_skip_to(te, &t) ? Qtrue : Qfalse;
80
- }
81
-
82
- /****************************************************************************
83
- *
84
- * TermVectorOffsetInfo Methods
85
- *
86
- ****************************************************************************/
87
-
88
- void
89
- frt_tvoi_free(void *p)
90
- {
91
- object_del(p);
92
- tvoi_destroy(p);
93
- }
94
-
95
- static VALUE
96
- frt_tvoi_init(VALUE self, VALUE rstart, VALUE rend)
97
- {
98
- TVOffsetInfo *tvoi = tvoi_create(FIX2INT(rstart), FIX2INT(rend));
99
- Frt_Wrap_Struct(self, NULL, &frt_tvoi_free, tvoi);
100
- object_add(tvoi, self);
101
- return self;
102
- }
103
-
104
- #define GET_TVOI TVOffsetInfo *tvoi = (TVOffsetInfo *)DATA_PTR(self)
105
-
106
- static VALUE
107
- frt_tvoi_set_start(VALUE self, VALUE rstart)
108
- {
109
- GET_TVOI;
110
- tvoi->start = FIX2INT(rstart);
111
- return Qnil;
112
- }
113
-
114
- static VALUE
115
- frt_tvoi_get_start(VALUE self)
116
- {
117
- GET_TVOI;
118
- return INT2FIX(tvoi->start);
119
- }
120
-
121
- static VALUE
122
- frt_tvoi_set_end(VALUE self, VALUE rend)
123
- {
124
- GET_TVOI;
125
- tvoi->end = FIX2INT(rend);
126
- return Qnil;
127
- }
128
-
129
- static VALUE
130
- frt_tvoi_get_end(VALUE self)
131
- {
132
- GET_TVOI;
133
- return INT2FIX(tvoi->end);
134
- }
135
-
136
- static VALUE
137
- frt_tvoi_eql(VALUE self, VALUE rother)
138
- {
139
- GET_TVOI;
140
- TVOffsetInfo *other;
141
- if (TYPE(rother) != T_DATA) return Qfalse;
142
- Data_Get_Struct(rother, TVOffsetInfo, other);
143
-
144
- return ((tvoi->start == other->start) && (tvoi->end == other->end))
145
- ? Qtrue : Qfalse;
146
- }
147
-
148
- static VALUE
149
- frt_tvoi_hash(VALUE self, VALUE rother)
150
- {
151
- GET_TVOI;
152
- return INT2FIX(29 * tvoi->start + tvoi->end);
153
- }
154
-
155
- static VALUE
156
- frt_tvoi_to_s(VALUE self)
157
- {
158
- char buf[60];
159
- GET_TVOI;
160
- sprintf(buf, "TermVectorOffsetInfo(%d:%d)", tvoi->start, tvoi->end);
161
- return rb_str_new2(buf);
162
- }
163
-
164
- /****************************************************************************
165
- *
166
- * TermVector Methods
167
- *
168
- ****************************************************************************/
169
-
170
- void
171
- frt_tv_free(void *p)
172
- {
173
- int i;
174
- TermVector *tv = (TermVector *)p;
175
- for (i = 0; i < tv->tcnt; i++) {
176
- free(tv->terms[i]);
177
- }
178
- free(tv->terms);
179
- if (tv->positions) {
180
- for (i = 0; i < tv->tcnt; i++) {
181
- free(tv->positions[i]);
182
- }
183
- free(tv->positions);
184
- }
185
- if (tv->offsets) {
186
- for (i = 0; i < tv->tcnt; i++) {
187
- free(tv->offsets[i]);
188
- }
189
- free(tv->offsets);
190
- }
191
- free(tv->freqs);
192
- object_del(p);
193
- free(p);
194
- }
195
-
196
- void
197
- frt_tv_mark(void *p)
198
- {
199
- int i, j;
200
- TermVector *tv = (TermVector *)p;
201
- if (tv->offsets != NULL) {
202
- for (i = 0; i < tv->tcnt; i++) {
203
- for (j = 0; j < tv->freqs[i]; j++) {
204
- frt_gc_mark(tv->offsets[i][j]);
205
- }
206
- }
207
- }
208
- }
209
-
210
- static VALUE
211
- frt_get_tv(TermVector *tv)
212
- {
213
- VALUE self = Qnil;
214
- if (tv) {
215
- self = object_get(tv);
216
- if (self == Qnil) {
217
- self = Data_Wrap_Struct(cTermVector, &frt_tv_mark, &frt_tv_free, tv);
218
- if (tv->offsets) {
219
- TVOffsetInfo *tvoi;
220
- VALUE rtvoi;
221
- int i, j;
222
- for (i = 0; i < tv->tcnt; i++) {
223
- for (j = 0; j < tv->freqs[i]; j++) {
224
- tvoi = tv->offsets[i][j];
225
- if (object_get(tvoi) == Qnil) {
226
- rtvoi = Data_Wrap_Struct(cTVOffsetInfo, NULL, &frt_tvoi_free, tvoi);
227
- object_add(tvoi, rtvoi);
228
- }
229
- }
230
- }
231
- }
232
- object_add(tv, self);
233
- }
234
- }
235
- return self;
236
- }
237
-
238
- #define GET_TV TermVector *tv = (TermVector *)DATA_PTR(self)
239
-
240
- static VALUE
241
- frt_tv_get_field(VALUE self)
242
- {
243
- GET_TV;
244
- return rb_str_new2(tv->field);
245
- }
246
-
247
- static VALUE
248
- frt_tv_get_terms(VALUE self)
249
- {
250
- int i;
251
- GET_TV;
252
- VALUE rterms = rb_ary_new2(tv->tcnt);
253
- for (i = 0; i < tv->tcnt; i++) {
254
- rb_ary_push(rterms, rb_str_new2(tv->terms[i]));
255
- }
256
- return rterms;
257
- }
258
-
259
- static VALUE
260
- frt_tv_get_freqs(VALUE self)
261
- {
262
- int i;
263
- GET_TV;
264
- VALUE rfreqs = rb_ary_new2(tv->tcnt);
265
- for (i = 0; i < tv->tcnt; i++) {
266
- rb_ary_push(rfreqs, INT2FIX(tv->freqs[i]));
267
- }
268
- return rfreqs;
269
- }
270
-
271
- static VALUE
272
- frt_tv_get_positions(VALUE self)
273
- {
274
- int i, j, freq;
275
- GET_TV;
276
- VALUE rpositions, rpositionss;
277
-
278
- if (!tv->positions) return Qnil;
279
- rpositionss = rb_ary_new2(tv->tcnt);
280
- for (i = 0; i < tv->tcnt; i++) {
281
- freq = tv->freqs[i];
282
- rpositions = rb_ary_new2(freq);
283
- for (j = 0; j < freq; j++) {
284
- rb_ary_push(rpositions, INT2FIX(tv->positions[i][j]));
285
- }
286
- rb_ary_push(rpositionss, rpositions);
287
- }
288
- return rpositionss;
289
- }
290
-
291
- static VALUE
292
- frt_tv_get_offsets(VALUE self)
293
- {
294
- int i, j, freq;
295
- GET_TV;
296
- VALUE roffsetss, roffsets, roffset;
297
- if (!tv->offsets) return Qnil;
298
- roffsetss = rb_ary_new2(tv->tcnt);
299
-
300
- for (i = 0; i < tv->tcnt; i++) {
301
- freq = tv->freqs[i];
302
- roffsets = rb_ary_new2(freq);
303
- for (j = 0; j < freq; j++) {
304
- roffset = object_get(tv->offsets[i][j]);
305
- rb_ary_push(roffsets, roffset);
306
- }
307
- rb_ary_push(roffsetss, roffsets);
308
- }
309
- return roffsetss;
310
- }
311
-
312
- /****************************************************************************
313
- *
314
- * TermDocEnum Methods
315
- *
316
- ****************************************************************************/
317
-
318
- void
319
- frt_tde_free(void *p)
320
- {
321
- TermDocEnum *tde = (TermDocEnum *)p;
322
- tde->close(tde);
323
- }
324
-
325
- static VALUE
326
- frt_get_tde(TermDocEnum *tde)
327
- {
328
- return Data_Wrap_Struct(cTermDocEnum, NULL, &frt_tde_free, tde);
329
- }
330
-
331
- #define GET_TDE TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self)
332
-
333
- static VALUE
334
- frt_tde_close(VALUE self)
335
- {
336
- GET_TDE;
337
- Frt_Unwrap_Struct(self);
338
- tde->close(tde);
339
- return Qnil;
340
- }
341
-
342
- static VALUE
343
- frt_tde_seek(VALUE self, VALUE rterm)
344
- {
345
- GET_TDE;
346
- Term t;
347
- frt_set_term(rterm, &t);
348
- tde->seek(tde, &t);
349
- return Qnil;
350
- }
351
-
352
- static VALUE
353
- frt_tde_doc(VALUE self)
354
- {
355
- GET_TDE;
356
- return INT2FIX(tde->doc_num(tde));
357
- }
358
-
359
- static VALUE
360
- frt_tde_freq(VALUE self)
361
- {
362
- GET_TDE;
363
- return INT2FIX(tde->freq(tde));
364
- }
365
-
366
- static VALUE
367
- frt_tde_next(VALUE self)
368
- {
369
- GET_TDE;
370
- return tde->next(tde) ? Qtrue : Qfalse;
371
- }
372
-
373
- static VALUE
374
- frt_tde_next_position(VALUE self)
375
- {
376
- GET_TDE;
377
- return INT2FIX(tde->next_position(tde));
378
- }
379
-
380
- static VALUE
381
- frt_tde_read(VALUE self, VALUE rdocs, VALUE rfreqs)
382
- {
383
- int i, req_num, cnt;
384
- GET_TDE;
385
- Check_Type(rdocs, T_ARRAY);
386
- Check_Type(rfreqs, T_ARRAY);
387
- req_num = MIN(RARRAY(rdocs)->len, RARRAY(rfreqs)->len);
388
- cnt = tde->read(tde, (int *)RARRAY(rdocs)->ptr,
389
- (int *)RARRAY(rfreqs)->ptr, req_num);
390
- for (i = 0; i < cnt; i++) {
391
- RARRAY(rdocs)->ptr[i] = INT2FIX(RARRAY(rdocs)->ptr[i]);
392
- RARRAY(rfreqs)->ptr[i] = INT2FIX(RARRAY(rfreqs)->ptr[i]);
393
- }
394
- return INT2FIX(cnt);
395
- }
396
-
397
- static VALUE
398
- frt_tde_skip_to(VALUE self, VALUE rtarget)
399
- {
400
- GET_TDE;
401
- return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
402
- }
403
-
404
- /****************************************************************************
405
- *
406
- * IndexWriter Methods
407
- *
408
- ****************************************************************************/
409
-
410
- void
411
- frt_iw_free(void *p)
412
- {
413
- IndexWriter *iw = (IndexWriter *)p;
414
- iw_close(iw);
415
- }
416
-
417
- void
418
- frt_iw_mark(void *p)
419
- {
420
- IndexWriter *iw = (IndexWriter *)p;
421
- frt_gc_mark(iw->analyzer);
422
- frt_gc_mark(iw->store);
423
- }
424
-
425
- #define SET_INT_ATTR(attr) \
426
- if (RTEST(rval = rb_hash_aref(roptions, r##attr##_key)))\
427
- iw->attr = FIX2INT(rval);
428
-
429
- static VALUE
430
- frt_iw_init(int argc, VALUE *argv, VALUE self)
431
- {
432
- VALUE rdir, roptions, rval;
433
- bool create = false;
434
- bool use_compound_file = true;
435
- Store *store;
436
- Analyzer *analyzer = NULL;
437
- IndexWriter *iw;
438
- rb_scan_args(argc, argv, "02", &rdir, &roptions);
439
- if (argc > 0) {
440
- if (TYPE(rdir) == T_DATA) {
441
- store = DATA_PTR(rdir);
442
- ref(store);
443
- } else {
444
- StringValue(rdir);
445
- frt_create_dir(rdir);
446
- store = open_fs_store(RSTRING(rdir)->ptr);
447
- }
448
- } else {
449
- store = open_ram_store();
450
- }
451
- if (argc == 2) {
452
- Check_Type(roptions, T_HASH);
453
- /* Let ruby's GC handle the closing of the store
454
- if (!close_dir) {
455
- close_dir = RTEST(rb_hash_aref(roptions, rclose_dir_key));
456
- }
457
- */
458
- /* use_compound_file defaults to true */
459
- use_compound_file =
460
- (rb_hash_aref(roptions, ruse_compound_file_key) == Qfalse) ? false : true;
461
-
462
- rval = rb_hash_aref(roptions, ranalyzer_key);
463
- if (rval == Qnil) {
464
- analyzer = mb_standard_analyzer_create(true);
465
- } else {
466
- analyzer = frt_get_cwrapped_analyzer(rval);
467
- }
468
- create = RTEST(rb_hash_aref(roptions, rcreate_key));
469
- if (!create && RTEST(rb_hash_aref(roptions, rcreate_if_missing_key))) {
470
- if (!store->exists(store, "segments")) {
471
- create = true;
472
- }
473
- }
474
- }
475
- iw = iw_open(store, analyzer, create);
476
- store_deref(store);
477
- iw->use_compound_file = use_compound_file;
478
-
479
- SET_INT_ATTR(merge_factor);
480
- SET_INT_ATTR(min_merge_docs);
481
- SET_INT_ATTR(max_merge_docs);
482
- SET_INT_ATTR(max_field_length);
483
- SET_INT_ATTR(term_index_interval);
484
-
485
- Frt_Wrap_Struct(self, &frt_iw_mark, &frt_iw_free, iw);
486
- return self;
487
- }
488
-
489
- #define GET_IW IndexWriter *iw = (IndexWriter *)DATA_PTR(self)
490
-
491
- static VALUE
492
- frt_iw_close(VALUE self)
493
- {
494
- GET_IW;
495
- Frt_Unwrap_Struct(self);
496
- iw_close(iw);
497
- return Qnil;
498
- }
499
-
500
- static VALUE
501
- frt_iw_add_doc(VALUE self, VALUE rdoc)
502
- {
503
- GET_IW;
504
- Document *doc;
505
- Data_Get_Struct(rdoc, Document, doc);
506
- iw_add_doc(iw, doc);
507
- return Qnil;
508
- }
509
-
510
- static VALUE
511
- frt_iw_set_merge_factor(VALUE self, VALUE val)
512
- {
513
- GET_IW;
514
- iw->merge_factor = FIX2INT(val);
515
- return Qnil;
516
- }
517
-
518
- static VALUE
519
- frt_iw_set_min_merge_docs(VALUE self, VALUE val)
520
- {
521
- GET_IW;
522
- iw->min_merge_docs = FIX2INT(val);
523
- return Qnil;
524
- }
525
-
526
- static VALUE
527
- frt_iw_set_max_merge_docs(VALUE self, VALUE val)
528
- {
529
- GET_IW;
530
- iw->max_merge_docs = FIX2INT(val);
531
- return Qnil;
532
- }
533
-
534
- static VALUE
535
- frt_iw_set_max_field_length(VALUE self, VALUE val)
536
- {
537
- GET_IW;
538
- iw->max_field_length = FIX2INT(val);
539
- return Qnil;
540
- }
541
-
542
- static VALUE
543
- frt_iw_set_term_index_interval(VALUE self, VALUE val)
544
- {
545
- GET_IW;
546
- iw->term_index_interval = FIX2INT(val);
547
- return Qnil;
548
- }
549
-
550
- static VALUE
551
- frt_iw_set_use_compound_file(VALUE self, VALUE val)
552
- {
553
- GET_IW;
554
- iw->use_compound_file = FIX2INT(val);
555
- return Qnil;
556
- }
557
-
558
- static VALUE
559
- frt_iw_get_doc_count(VALUE self)
560
- {
561
- GET_IW;
562
- return INT2FIX(iw_doc_count(iw));
563
- }
564
-
565
- static VALUE
566
- frt_iw_get_merge_factor(VALUE self)
567
- {
568
- GET_IW;
569
- return INT2FIX(iw->merge_factor);
570
- }
571
-
572
- static VALUE
573
- frt_iw_get_min_merge_docs(VALUE self)
574
- {
575
- GET_IW;
576
- return INT2FIX(iw->min_merge_docs);
577
- }
578
-
579
- static VALUE
580
- frt_iw_get_max_merge_docs(VALUE self)
581
- {
582
- GET_IW;
583
- return INT2FIX(iw->max_merge_docs);
584
- }
585
-
586
- static VALUE
587
- frt_iw_get_max_field_length(VALUE self)
588
- {
589
- GET_IW;
590
- return INT2FIX(iw->max_field_length);
591
- }
592
-
593
- static VALUE
594
- frt_iw_get_term_index_interval(VALUE self)
595
- {
596
- GET_IW;
597
- return INT2FIX(iw->term_index_interval);
598
- }
599
-
600
- static VALUE
601
- frt_iw_get_use_compound_file(VALUE self)
602
- {
603
- GET_IW;
604
- return INT2FIX(iw->use_compound_file);
605
- }
606
-
607
- static VALUE
608
- frt_iw_optimize(VALUE self)
609
- {
610
- GET_IW;
611
- iw_optimize(iw);
612
- return Qnil;
613
- }
614
-
615
- /****************************************************************************
616
- *
617
- * IndexReader Methods
618
- *
619
- ****************************************************************************/
620
-
621
- void
622
- frt_ir_free(void *p)
623
- {
624
- object_del(p);
625
- ir_close((IndexReader *)p);
626
- }
627
-
628
- void
629
- frt_ir_mark(void *p)
630
- {
631
- IndexReader *ir = (IndexReader *)p;
632
- frt_gc_mark(ir->store);
633
- }
634
-
635
- static VALUE
636
- frt_ir_init(int argc, VALUE *argv, VALUE self)
637
- {
638
- VALUE rdir, rclose_dir;
639
- //bool close_dir = false;
640
- Store *store = NULL;
641
- IndexReader *ir;
642
- switch (rb_scan_args(argc, argv, "11", &rdir, &rclose_dir)) {
643
- case 2: //close_dir = RTEST(rclose_dir);
644
- case 1:
645
- if (TYPE(rdir) == T_DATA) {
646
- store = DATA_PTR(rdir);
647
- } else {
648
- rdir = rb_obj_as_string(rdir);
649
- frt_create_dir(rdir);
650
- store = open_fs_store(RSTRING(rdir)->ptr);
651
- deref(store);
652
- }
653
- }
654
- ir = ir_open(store);
655
- Frt_Wrap_Struct(self, &frt_ir_mark, &frt_ir_free, ir);
656
- object_add(ir, self);
657
- return self;
658
- }
659
-
660
- static VALUE
661
- frt_ir_open(int argc, VALUE *argv, VALUE klass)
662
- {
663
- VALUE self = Frt_Make_Struct(klass);
664
- return frt_ir_init(argc, argv, self);
665
- }
666
-
667
- #define GET_IR IndexReader *ir = (IndexReader *)DATA_PTR(self)
668
-
669
- static VALUE
670
- frt_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval)
671
- {
672
- GET_IR;
673
- rfield = rb_obj_as_string(rfield);
674
- ir_set_norm(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr, NUM2CHR(rval));
675
- return Qnil;
676
- }
677
-
678
- static VALUE
679
- frt_ir_get_norms(VALUE self, VALUE rfield)
680
- {
681
- GET_IR;
682
- uchar *norms;
683
- rfield = rb_obj_as_string(rfield);
684
- norms = ir->get_norms(ir, RSTRING(rfield)->ptr);
685
- if (norms) {
686
- return rb_str_new((char *)norms, ir->max_doc(ir));
687
- } else {
688
- return Qnil;
689
- }
690
- }
691
-
692
- static VALUE
693
- frt_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
694
- {
695
- GET_IR;
696
- int offset;
697
- rfield = rb_obj_as_string(rfield);
698
- offset = FIX2INT(roffset);
699
- Check_Type(rnorms, T_STRING);
700
- if (RSTRING(rnorms)->len < offset + ir->max_doc(ir)) {
701
- rb_raise(rb_eArgError, "supplied a string of length:%d to IndexReader#get_norms_into but needed a string of length offset:%d + maxdoc:%d", RSTRING(rnorms)->len, offset, ir->max_doc(ir));
702
- }
703
-
704
- ir->get_norms_into(ir, RSTRING(rfield)->ptr, (uchar *)RSTRING(rnorms)->ptr, offset);
705
- return Qnil;
706
- }
707
-
708
- static VALUE
709
- frt_ir_commit(VALUE self)
710
- {
711
- GET_IR;
712
- ir_commit(ir);
713
- return Qnil;
714
- }
715
-
716
- static VALUE
717
- frt_ir_close(VALUE self)
718
- {
719
- GET_IR;
720
- object_del(ir);
721
- Frt_Unwrap_Struct(self);
722
- ir_close(ir);
723
- return Qnil;
724
- }
725
-
726
- static VALUE
727
- frt_ir_has_deletions(VALUE self)
728
- {
729
- GET_IR;
730
- return ir->has_deletions(ir) ? Qtrue : Qfalse;
731
- }
732
-
733
- static VALUE
734
- frt_ir_delete(VALUE self, VALUE rdoc_num)
735
- {
736
- GET_IR;
737
- int doc_num = FIX2INT(rdoc_num);
738
- ir_delete_doc(ir, doc_num);
739
- return Qnil;
740
- }
741
-
742
- static VALUE
743
- frt_ir_is_deleted(VALUE self, VALUE rdoc_num)
744
- {
745
- GET_IR;
746
- int doc_num = FIX2INT(rdoc_num);
747
- return ir->is_deleted(ir, doc_num) ? Qtrue : Qfalse;
748
- }
749
-
750
- static VALUE
751
- frt_ir_max_doc(VALUE self)
752
- {
753
- GET_IR;
754
- return INT2FIX(ir->max_doc(ir));
755
- }
756
-
757
- static VALUE
758
- frt_ir_num_docs(VALUE self)
759
- {
760
- GET_IR;
761
- return INT2FIX(ir->num_docs(ir));
762
- }
763
-
764
- static VALUE
765
- frt_ir_undelete_all(VALUE self)
766
- {
767
- GET_IR;
768
- ir_undelete_all(ir);
769
- return Qnil;
770
- }
771
-
772
- static VALUE
773
- frt_ir_get_doc(VALUE self, VALUE rdoc_num)
774
- {
775
- GET_IR;
776
- Document *doc = ir->get_doc(ir, FIX2INT(rdoc_num));
777
- return frt_get_doc(doc);
778
- }
779
-
780
- static VALUE
781
- frt_ir_is_latest(VALUE self)
782
- {
783
- GET_IR;
784
- return ir_is_latest(ir) ? Qtrue : Qfalse;
785
- }
786
-
787
- static VALUE
788
- frt_ir_get_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield)
789
- {
790
- GET_IR;
791
- TermVector *tv;
792
- rfield = rb_obj_as_string(rfield);
793
- tv = ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
794
- return frt_get_tv(tv);
795
- }
796
-
797
- static VALUE
798
- frt_ir_get_term_vectors(VALUE self, VALUE rdoc_num)
799
- {
800
- int i;
801
- GET_IR;
802
- Array *tvs = ir->get_term_vectors(ir, FIX2INT(rdoc_num));
803
- VALUE rtvs = rb_ary_new2(tvs->size);
804
- VALUE rtv;
805
- for (i = 0; i < tvs->size; i++) {
806
- rtv = frt_get_tv(tvs->elems[i]);
807
- rb_ary_push(rtvs, rtv);
808
- }
809
- tvs->free_elem = NULL;
810
- ary_destroy(tvs);
811
-
812
- return rtvs;
813
- }
814
-
815
- static VALUE
816
- frt_ir_term_docs(VALUE self)
817
- {
818
- GET_IR;
819
- return frt_get_tde(ir->term_docs(ir));
820
- }
821
-
822
- static VALUE
823
- frt_ir_term_docs_for(VALUE self, VALUE rterm)
824
- {
825
- GET_IR;
826
- Term t;
827
- frt_set_term(rterm, &t);
828
- return frt_get_tde(ir_term_docs_for(ir, &t));
829
- }
830
-
831
- static VALUE
832
- frt_ir_term_positions(VALUE self)
833
- {
834
- GET_IR;
835
- return frt_get_tde(ir->term_positions(ir));
836
- }
837
-
838
- static VALUE
839
- frt_ir_term_positions_for(VALUE self, VALUE rterm)
840
- {
841
- GET_IR;
842
- Term t;
843
- frt_set_term(rterm, &t);
844
- return frt_get_tde(ir_term_positions_for(ir, &t));
845
- }
846
-
847
- static VALUE
848
- frt_ir_doc_freq(VALUE self, VALUE rterm)
849
- {
850
- GET_IR;
851
- Term t;
852
- frt_set_term(rterm, &t);
853
- return INT2FIX(ir->doc_freq(ir, &t));
854
- }
855
-
856
- static VALUE
857
- frt_ir_terms(VALUE self)
858
- {
859
- TermEnum *te;
860
- GET_IR;
861
- te = ir->terms(ir);
862
- return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
863
- }
864
-
865
- static VALUE
866
- frt_ir_terms_from(VALUE self, VALUE rterm)
867
- {
868
- TermEnum *te;
869
- Term t;
870
- GET_IR;
871
- frt_set_term(rterm, &t);
872
- te = ir->terms_from(ir, &t);
873
- return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
874
- }
875
-
876
- static VALUE
877
- frt_ir_get_field_names(VALUE self)
878
- {
879
- GET_IR;
880
- VALUE rfnames;
881
- HashSet *fnames = ir->get_field_names(ir, IR_ALL);
882
- rfnames = frt_hs_to_rb_ary(fnames);
883
- hs_destroy(fnames);
884
- return rfnames;
885
- }
886
-
887
- /****************************************************************************
888
- *
889
- * Init Function
890
- *
891
- ****************************************************************************/
892
-
893
- void
894
- Init_index_io(void)
895
- {
896
- ranalyzer_key = ID2SYM(rb_intern("analyzer"));
897
- rclose_dir_key = ID2SYM(rb_intern("close_dir"));
898
- rcreate_key = ID2SYM(rb_intern("create"));
899
- rcreate_if_missing_key = ID2SYM(rb_intern("create_if_missing"));
900
- ruse_compound_file_key = ID2SYM(rb_intern("use_compound_file"));
901
- rmerge_factor_key = ID2SYM(rb_intern("merge_factor"));
902
- rmin_merge_docs_key = ID2SYM(rb_intern("min_merge_docs"));
903
- rmax_merge_docs_key = ID2SYM(rb_intern("max_merge_docs"));
904
- rmax_field_length_key = ID2SYM(rb_intern("max_field_length"));
905
- rterm_index_interval_key = ID2SYM(rb_intern("term_index_interval"));
906
-
907
- /* TermEnum */
908
- cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
909
- rb_define_alloc_func(cTermEnum, frt_data_alloc);
910
-
911
- rb_define_method(cTermEnum, "next?", frt_te_next, 0);
912
- rb_define_method(cTermEnum, "term", frt_te_term, 0);
913
- rb_define_method(cTermEnum, "doc_freq", frt_te_doc_freq, 0);
914
- rb_define_method(cTermEnum, "skip_to", frt_te_skip_to, 1);
915
- rb_define_method(cTermEnum, "close", frt_te_close, 0);
916
-
917
- /* TermVectorOffsetInfo */
918
- cTVOffsetInfo = rb_define_class_under(mIndex, "TermVectorOffsetInfo", rb_cObject);
919
- rb_define_alloc_func(cTVOffsetInfo, frt_data_alloc);
920
-
921
- rb_define_method(cTVOffsetInfo, "initialize", frt_tvoi_init, 2);
922
- rb_define_method(cTVOffsetInfo, "start=", frt_tvoi_set_start, 1);
923
- rb_define_method(cTVOffsetInfo, "start", frt_tvoi_get_start, 0);
924
- rb_define_method(cTVOffsetInfo, "end=", frt_tvoi_set_end, 1);
925
- rb_define_method(cTVOffsetInfo, "end", frt_tvoi_get_end, 0);
926
- rb_define_method(cTVOffsetInfo, "eql?", frt_tvoi_eql, 1);
927
- rb_define_method(cTVOffsetInfo, "==", frt_tvoi_eql, 1);
928
- rb_define_method(cTVOffsetInfo, "hash", frt_tvoi_hash, 0);
929
- rb_define_method(cTVOffsetInfo, "to_s", frt_tvoi_to_s, 0);
930
-
931
- /* TermVector */
932
- cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
933
- rb_define_alloc_func(cTermVector, frt_data_alloc);
934
- rb_define_method(cTermVector, "field", frt_tv_get_field, 0);
935
- rb_define_method(cTermVector, "terms", frt_tv_get_terms, 0);
936
- rb_define_method(cTermVector, "freqs", frt_tv_get_freqs, 0);
937
- rb_define_method(cTermVector, "positions", frt_tv_get_positions, 0);
938
- rb_define_method(cTermVector, "offsets", frt_tv_get_offsets, 0);
939
-
940
- /* TermDocEnum */
941
- cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
942
- rb_define_alloc_func(cTermDocEnum, frt_data_alloc);
943
- rb_define_method(cTermDocEnum, "close", frt_tde_close, 0);
944
- rb_define_method(cTermDocEnum, "seek", frt_tde_seek, 1);
945
- rb_define_method(cTermDocEnum, "doc", frt_tde_doc, 0);
946
- rb_define_method(cTermDocEnum, "freq", frt_tde_freq, 0);
947
- rb_define_method(cTermDocEnum, "next?", frt_tde_next, 0);
948
- rb_define_method(cTermDocEnum, "next_position", frt_tde_next_position, 0);
949
- rb_define_method(cTermDocEnum, "read", frt_tde_read, 2);
950
- rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
951
-
952
- /* IndexWriter */
953
- cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
954
- rb_define_alloc_func(cIndexWriter, frt_data_alloc);
955
-
956
- rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
957
- rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
958
- rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
959
- rb_str_new2(WRITE_LOCK_NAME));
960
- rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
961
- rb_str_new2(COMMIT_LOCK_NAME));
962
- rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
963
- INT2FIX(config.merge_factor));
964
- rb_define_const(cIndexWriter, "DEFAULT_MIN_MERGE_DOCS",
965
- INT2FIX(config.min_merge_docs));
966
- rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
967
- INT2FIX(config.max_merge_docs));
968
- rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
969
- INT2FIX(config.max_field_length));
970
- rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
971
- INT2FIX(config.term_index_interval));
972
-
973
- rb_define_method(cIndexWriter, "initialize", frt_iw_init, -1);
974
- rb_define_method(cIndexWriter, "close", frt_iw_close, 0);
975
- rb_define_method(cIndexWriter, "add_document", frt_iw_add_doc, 1);
976
- rb_define_method(cIndexWriter, "<<", frt_iw_add_doc, 1);
977
- rb_define_method(cIndexWriter, "merge_factor", frt_iw_get_merge_factor, 0);
978
- rb_define_method(cIndexWriter, "min_merge_docs", frt_iw_get_min_merge_docs, 0);
979
- rb_define_method(cIndexWriter, "max_merge_docs", frt_iw_get_max_merge_docs, 0);
980
- rb_define_method(cIndexWriter, "max_field_length", frt_iw_get_max_field_length, 0);
981
- rb_define_method(cIndexWriter, "term_index_interval", frt_iw_get_term_index_interval, 0);
982
- rb_define_method(cIndexWriter, "use_compound_file", frt_iw_get_use_compound_file, 0);
983
- rb_define_method(cIndexWriter, "doc_count", frt_iw_get_doc_count, 0);
984
- rb_define_method(cIndexWriter, "merge_factor=", frt_iw_set_merge_factor, 1);
985
- rb_define_method(cIndexWriter, "min_merge_docs=", frt_iw_set_min_merge_docs, 1);
986
- rb_define_method(cIndexWriter, "max_merge_docs=", frt_iw_set_max_merge_docs, 1);
987
- rb_define_method(cIndexWriter, "max_field_length=", frt_iw_set_max_field_length, 1);
988
- rb_define_method(cIndexWriter, "term_index_interval=", frt_iw_set_term_index_interval, 1);
989
- rb_define_method(cIndexWriter, "use_compound_file=", frt_iw_set_use_compound_file, 1);
990
- rb_define_method(cIndexWriter, "optimize", frt_iw_optimize, 0);
991
-
992
- /* IndexReader */
993
- cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
994
- rb_define_alloc_func(cIndexReader, frt_data_alloc);
995
- rb_define_singleton_method(cIndexReader, "open", frt_ir_open, -1);
996
- rb_define_method(cIndexReader, "initialize", frt_ir_init, -1);
997
- rb_define_method(cIndexReader, "set_norm", frt_ir_set_norm, 3);
998
- rb_define_method(cIndexReader, "get_norms", frt_ir_get_norms, 1);
999
- rb_define_method(cIndexReader, "get_norms_into", frt_ir_get_norms_into, 3);
1000
- rb_define_method(cIndexReader, "commit", frt_ir_commit, 0);
1001
- rb_define_method(cIndexReader, "close", frt_ir_close, 0);
1002
- rb_define_method(cIndexReader, "has_deletions?", frt_ir_has_deletions, 0);
1003
- rb_define_method(cIndexReader, "delete", frt_ir_delete, 1);
1004
- rb_define_method(cIndexReader, "deleted?", frt_ir_is_deleted, 1);
1005
- rb_define_method(cIndexReader, "max_doc", frt_ir_max_doc, 0);
1006
- rb_define_method(cIndexReader, "num_docs", frt_ir_num_docs, 0);
1007
- rb_define_method(cIndexReader, "undelete_all", frt_ir_undelete_all, 0);
1008
- rb_define_method(cIndexReader, "latest?", frt_ir_is_latest, 0);
1009
- rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, 1);
1010
- rb_define_method(cIndexReader, "[]", frt_ir_get_doc, 1);
1011
- rb_define_method(cIndexReader, "get_term_vector", frt_ir_get_term_vector, 2);
1012
- rb_define_method(cIndexReader, "get_term_vectors", frt_ir_get_term_vectors, 1);
1013
- rb_define_method(cIndexReader, "term_docs", frt_ir_term_docs, 0);
1014
- rb_define_method(cIndexReader, "term_positions", frt_ir_term_positions, 0);
1015
- rb_define_method(cIndexReader, "term_docs_for", frt_ir_term_docs_for, 1);
1016
- rb_define_method(cIndexReader, "term_positions_for", frt_ir_term_positions_for, 1);
1017
- rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 1);
1018
- rb_define_method(cIndexReader, "terms", frt_ir_terms, 0);
1019
- rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 1);
1020
- rb_define_method(cIndexReader, "get_field_names", frt_ir_get_field_names, 0);
1021
- }