ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_doc.c DELETED
@@ -1,582 +0,0 @@
1
- #include "ferret.h"
2
- #include "document.h"
3
-
4
- VALUE cDocument;
5
- VALUE cField;
6
- VALUE cFieldStore;
7
- VALUE cFieldIndex;
8
- VALUE cFieldTermVector;
9
-
10
- /****************************************************************************
11
- *
12
- * Field Methods
13
- *
14
- ****************************************************************************/
15
-
16
- void
17
- frt_field_free(void *p)
18
- {
19
- object_del(p);
20
- df_destroy_data(p);
21
- }
22
-
23
- static VALUE
24
- frt_field_alloc(VALUE klass)
25
- {
26
- VALUE self;
27
- DocField *df = ALLOC(DocField);
28
- df->name = NULL;
29
- df->data = NULL;
30
- self = Data_Wrap_Struct(klass, NULL, &frt_field_free, df);
31
- object_add(df, self);
32
- return self;
33
- }
34
-
35
- #define GET_DF DocField *df = (DocField *)DATA_PTR(self)
36
-
37
- static VALUE
38
- frt_field_init(int argc, VALUE *argv, VALUE self)
39
- {
40
- GET_DF;
41
- VALUE rname, rdata, rstored, rindexed, rstore_tv, rbinary, rboost;
42
- char *name;
43
- char *data;
44
- float boost = 1.0;
45
- int stored = 0, indexed = 0, store_tv = 0;
46
- int len;
47
- bool binary = false;
48
- switch (rb_scan_args(argc, argv, "25", &rname, &rdata, &rstored,
49
- &rindexed, &rstore_tv, &rbinary, &rboost)) {
50
- case 7: boost = (float)rb_num2dbl(rboost);
51
- case 6: binary = RTEST(rbinary);
52
- case 5: store_tv = FIX2INT(rstore_tv);
53
- case 4: indexed = FIX2INT(rindexed);
54
- case 3: stored = FIX2INT(rstored);
55
- default:
56
- rname = rb_obj_as_string(rname);
57
- rdata = rb_obj_as_string(rdata);
58
- break;
59
- }
60
- name = RSTRING(rname)->ptr;
61
- len = RSTRING(rdata)->len;
62
- data = ALLOC_N(char, len + 1);
63
- MEMCPY(data, RSTRING(rdata)->ptr, char, len);
64
- data[len] = 0;
65
- df_set(df, name, data, stored, indexed, store_tv);
66
- df->blen = len;
67
- df->is_binary = binary;
68
- df->boost = boost;
69
- return Qnil;
70
- }
71
-
72
- static VALUE
73
- frt_field_get_name(VALUE self)
74
- {
75
- GET_DF;
76
- return rb_str_new2(df->name);
77
- }
78
-
79
- static VALUE
80
- frt_field_set_name(VALUE self, VALUE rname)
81
- {
82
- int len;
83
- GET_DF;
84
- rname = rb_obj_as_string(rname);
85
- len = RSTRING(rname)->len;
86
- REALLOC_N(df->name, char, len);
87
- MEMCPY(df->name, RSTRING(rname)->ptr, char, len);
88
- return Qnil;
89
- }
90
-
91
- static VALUE
92
- frt_field_get_data(VALUE self)
93
- {
94
- GET_DF;
95
- return rb_str_new(df->data, df->blen);
96
- }
97
-
98
- static VALUE
99
- frt_field_set_data(VALUE self, VALUE rdata)
100
- {
101
- int len;
102
- GET_DF;
103
- rdata = rb_obj_as_string(rdata);
104
- len = RSTRING(rdata)->len;
105
- REALLOC_N(df->data, char, len);
106
- MEMCPY(df->data, RSTRING(rdata)->ptr, char, len);
107
- df->blen = len;
108
- return Qnil;
109
- }
110
-
111
- static VALUE
112
- frt_field_get_boost(VALUE self)
113
- {
114
- GET_DF;
115
- return rb_float_new((double)df->boost);
116
- }
117
-
118
- static VALUE
119
- frt_field_set_boost(VALUE self, VALUE rboost)
120
- {
121
- GET_DF;
122
- df->boost = (float)rb_num2dbl(rboost);
123
- return Qnil;
124
- }
125
-
126
- static VALUE
127
- frt_field_is_stored(VALUE self)
128
- {
129
- GET_DF;
130
- return df->is_stored ? Qtrue : Qfalse;
131
- }
132
-
133
- static VALUE
134
- frt_field_is_indexed(VALUE self)
135
- {
136
- GET_DF;
137
- return df->is_indexed ? Qtrue : Qfalse;
138
- }
139
-
140
- static VALUE
141
- frt_field_is_tokenized(VALUE self)
142
- {
143
- GET_DF;
144
- return df->is_tokenized ? Qtrue : Qfalse;
145
- }
146
-
147
- static VALUE
148
- frt_field_is_binary(VALUE self)
149
- {
150
- GET_DF;
151
- return df->is_binary ? Qtrue : Qfalse;
152
- }
153
-
154
- static VALUE
155
- frt_field_is_compressed(VALUE self)
156
- {
157
- GET_DF;
158
- return df->is_compressed ? Qtrue : Qfalse;
159
- }
160
-
161
- static VALUE
162
- frt_field_store_tv(VALUE self)
163
- {
164
- GET_DF;
165
- return df->store_tv ? Qtrue : Qfalse;
166
- }
167
-
168
- static VALUE
169
- frt_field_store_pos(VALUE self)
170
- {
171
- GET_DF;
172
- return df->store_pos ? Qtrue : Qfalse;
173
- }
174
-
175
- static VALUE
176
- frt_field_store_offset(VALUE self)
177
- {
178
- GET_DF;
179
- return df->store_offset ? Qtrue : Qfalse;
180
- }
181
-
182
- static VALUE
183
- frt_field_omit_norms(VALUE self)
184
- {
185
- GET_DF;
186
- return df->omit_norms ? Qtrue : Qfalse;
187
- }
188
-
189
- static VALUE
190
- frt_field_to_s(VALUE self)
191
- {
192
- VALUE rstr;
193
- char *str;
194
- GET_DF;
195
-
196
- str = df_to_s(df);
197
- rstr = rb_str_new2(str);
198
- free(str);
199
- return rstr;
200
- }
201
-
202
- static VALUE
203
- frt_field_new_binary(VALUE klass, VALUE rname, VALUE rdata, VALUE rstore)
204
- {
205
- char *data;
206
- int len;
207
- DocField *df;
208
- int store = FIX2INT(rstore);
209
- rname = rb_obj_as_string(rname);
210
- rdata = rb_obj_as_string(rdata);
211
- len = RSTRING(rdata)->len;
212
- data = ALLOC_N(char, len);
213
- MEMCPY(data, RSTRING(rdata)->ptr, char, len);
214
-
215
- df = df_create_binary(RSTRING(rname)->ptr, data, len, store);
216
- return Data_Wrap_Struct(klass, NULL, &df_destroy_data, df);
217
- }
218
-
219
- static VALUE
220
- frt_field_set_store(VALUE self, VALUE rstore)
221
- {
222
- GET_DF;
223
- int store = FIX2INT(rstore);
224
- df_set_store(df, store);
225
- return Qnil;
226
- }
227
-
228
- static VALUE
229
- frt_field_set_term_vector(VALUE self, VALUE rterm_vector)
230
- {
231
- GET_DF;
232
- int term_vector = FIX2INT(rterm_vector);
233
- df_set_term_vector(df, term_vector);
234
- return Qnil;
235
- }
236
-
237
- static VALUE
238
- frt_field_set_index(VALUE self, VALUE rindex)
239
- {
240
- GET_DF;
241
- int index = FIX2INT(rindex);
242
- df_set_index(df, index);
243
- return Qnil;
244
- }
245
-
246
- /****************************************************************************
247
- *
248
- * Document Methods
249
- *
250
- ****************************************************************************/
251
-
252
- void
253
- frt_doc_free(void *p)
254
- {
255
- object_del(p);
256
- doc_destroy(p);
257
- }
258
-
259
- void
260
- frt_doc_mark(void *p)
261
- {
262
- int i;
263
- DocField *df;
264
- Document *doc = (Document *)p;
265
- for (i = 0; i < doc->dfcnt; i++) {
266
- df = doc->df_arr[i];
267
- frt_gc_mark(df);
268
- }
269
- }
270
-
271
- static VALUE
272
- frt_doc_alloc(VALUE klass)
273
- {
274
- Document *doc = doc_create();
275
- VALUE self = Data_Wrap_Struct(klass, &frt_doc_mark, &frt_doc_free, doc);
276
- doc->free_data = NULL;
277
- object_add(doc, self);
278
- return self;
279
- }
280
-
281
- VALUE
282
- frt_get_doc(Document *doc)
283
- {
284
- VALUE rfield, self = Qnil;
285
- DocField *df;
286
- int i;
287
-
288
- if (!doc || (self = object_get(doc)) != Qnil) return self;
289
-
290
- doc->free_data = NULL;
291
- self = Data_Wrap_Struct(cDocument, frt_doc_mark, frt_doc_free, doc);
292
-
293
- /* We add all the document's fields to the ruby object space so that they
294
- * can be retrieved in ruby later. This code must come after the above
295
- * wrapper which puts the document in the ruby object space so that there is
296
- * something to mark the doc fields when garbage collection starts. */
297
- for (i = 0; i < doc->dfcnt; i++) {
298
- df = doc->df_arr[i];
299
- rfield = Data_Wrap_Struct(cField, NULL, &frt_field_free, df);
300
- object_add(df, rfield);
301
- }
302
-
303
- object_add(doc, self);
304
- return self;
305
- }
306
-
307
- #define GET_DOC Document *doc = (Document *)DATA_PTR(self)
308
-
309
- static VALUE
310
- frt_doc_init(VALUE self)
311
- {
312
- return self;
313
- }
314
-
315
- static VALUE
316
- frt_doc_all_fields(VALUE self)
317
- {
318
- int i;
319
- GET_DOC;
320
- VALUE values = rb_ary_new2(doc->dfcnt);
321
- for (i = 0; i < doc->dfcnt; i++) {
322
- rb_ary_push(values, object_get(doc->df_arr[i]));
323
- }
324
- return values;
325
- }
326
-
327
- static VALUE
328
- frt_doc_field_count(VALUE self)
329
- {
330
- GET_DOC;
331
- return INT2FIX(doc->fcnt);
332
- }
333
-
334
- static VALUE
335
- frt_doc_entry_count(VALUE self)
336
- {
337
- GET_DOC;
338
- return INT2FIX(doc->dfcnt);
339
- }
340
-
341
- static VALUE
342
- frt_doc_add_field(VALUE self, VALUE rfield)
343
- {
344
- DocField *df;
345
- GET_DOC;
346
- Data_Get_Struct(rfield, DocField, df);
347
- doc_add_field(doc, df);
348
- return Qnil;
349
- }
350
-
351
- /* TODO: return the removed fields as an array */
352
- static VALUE
353
- frt_doc_remove_fields(VALUE self, VALUE rname)
354
- {
355
- Array *fields;
356
- GET_DOC;
357
- rname = rb_obj_as_string(rname);
358
- fields = doc_remove_fields(doc, RSTRING(rname)->ptr);
359
- ary_destroy(fields);
360
- return Qnil;
361
- }
362
-
363
- static VALUE
364
- frt_doc_remove_field(VALUE self, VALUE rname)
365
- {
366
- DocField *df;
367
- GET_DOC;
368
- rname = rb_obj_as_string(rname);
369
- df = doc_remove_field(doc, RSTRING(rname)->ptr);
370
- return object_get(df);
371
- }
372
-
373
- static VALUE
374
- frt_doc_field(VALUE self, VALUE rname)
375
- {
376
- GET_DOC;
377
- DocField *df;
378
- rname = rb_obj_as_string(rname);
379
- df = doc_get_field(doc, RSTRING(rname)->ptr);
380
- return object_get(df);
381
- }
382
-
383
- static VALUE
384
- frt_doc_fields(VALUE self, VALUE rname)
385
- {
386
- int i;
387
- VALUE fields;
388
- GET_DOC;
389
- Array *dfs;
390
- rname = rb_obj_as_string(rname);
391
- dfs = doc_get_fields(doc, RSTRING(rname)->ptr);
392
- if (!dfs) return Qnil;
393
- fields = rb_ary_new2(dfs->size);
394
- for (i = 0; i < dfs->size; i++) {
395
- rb_ary_push(fields, object_get(dfs->elems[i]));
396
- }
397
-
398
- return fields;
399
- }
400
-
401
- static VALUE
402
- frt_doc_values(VALUE self, VALUE rname)
403
- {
404
- int i, len = 0, vindex = 0;
405
- VALUE rvalues;
406
- char *values = NULL;
407
- GET_DOC;
408
- Array *dfs;
409
- DocField *df;
410
- rname = rb_obj_as_string(rname);
411
- dfs = doc_get_fields(doc, RSTRING(rname)->ptr);
412
- if (!dfs) return Qnil;
413
-
414
- for (i = 0; i < dfs->size; i++) {
415
- df = (DocField *)dfs->elems[i];
416
- if (df->is_binary) continue;
417
- len += df->blen + 1;
418
- REALLOC_N(values, char, len);
419
- MEMCPY(values + vindex, df->data, char, df->blen);
420
- vindex = len;
421
- values[vindex-1] = ' ';
422
- }
423
- if (len) {
424
- values[len-1] = '\0';
425
- rvalues = rb_str_new(values, len-1);
426
- free(values);
427
- } else {
428
- rvalues = Qnil;
429
- }
430
-
431
- return rvalues;
432
- }
433
-
434
- static VALUE
435
- frt_doc_binaries(VALUE self, VALUE rname)
436
- {
437
- int i;
438
- VALUE rvalues;
439
- GET_DOC;
440
- Array *dfs;
441
- DocField *df;
442
- rname = rb_obj_as_string(rname);
443
- dfs = doc_get_fields(doc, RSTRING(rname)->ptr);
444
- if (!dfs) return Qnil;
445
-
446
- rvalues = rb_ary_new2(dfs->size);
447
- for (i = 0; i < dfs->size; i++) {
448
- df = (DocField *)dfs->elems[i];
449
- if (!df->is_binary) continue;
450
- rb_ary_push(rvalues, rb_str_new(df->data, df->blen));
451
- }
452
- return rvalues;
453
- }
454
-
455
- static VALUE
456
- frt_doc_set(VALUE self, VALUE rname, VALUE rdata)
457
- {
458
- DocField *df;
459
- GET_DOC;
460
- VALUE rfield;
461
- rname = rb_obj_as_string(rname);
462
- rdata = rb_obj_as_string(rdata);
463
-
464
- df = doc_get_field(doc, RSTRING(rname)->ptr);
465
- if (df) {
466
- free(df->data);
467
- df->data = estrdup(RSTRING(rdata)->ptr);
468
- rfield = object_get(df);
469
- } else {
470
- rfield = rb_funcall(cField, id_new, 2, rname, rdata);
471
- Data_Get_Struct(rfield, DocField, df);
472
- doc_add_field(doc, df);
473
- }
474
- return rfield;
475
- }
476
- static VALUE
477
- frt_doc_to_s(VALUE self)
478
- {
479
- char *str;
480
- VALUE rstr;
481
- GET_DOC;
482
- str = doc_to_s(doc);
483
- rstr = rb_str_new2(str);
484
- free(str);
485
- return rstr;
486
- }
487
-
488
- static VALUE
489
- frt_doc_get_boost(VALUE self)
490
- {
491
- GET_DOC;
492
- return rb_float_new((double)doc->boost);
493
- }
494
-
495
- static VALUE
496
- frt_doc_set_boost(VALUE self, VALUE rboost)
497
- {
498
- GET_DOC;
499
- doc->boost = (float)rb_num2dbl(rboost);
500
- return Qnil;
501
- }
502
-
503
-
504
- /****************************************************************************
505
- *
506
- * Init Function
507
- *
508
- ****************************************************************************/
509
-
510
- void
511
- Init_doc(void)
512
- {
513
- /* Field */
514
- cField = rb_define_class_under(mDocument, "Field", rb_cObject);
515
- rb_define_alloc_func(cField, frt_field_alloc);
516
-
517
- rb_define_method(cField, "initialize", frt_field_init, -1);
518
- rb_define_singleton_method(cField, "new_binary_field",
519
- frt_field_new_binary, 3);
520
- rb_define_method(cField, "name", frt_field_get_name, 0);
521
- rb_define_method(cField, "name=", frt_field_set_name, 1);
522
- rb_define_method(cField, "data", frt_field_get_data, 0);
523
- rb_define_method(cField, "data=", frt_field_set_data, 1);
524
- rb_define_method(cField, "boost", frt_field_get_boost, 0);
525
- rb_define_method(cField, "boost=", frt_field_set_boost, 1);
526
- rb_define_method(cField, "stored?", frt_field_is_stored, 0);
527
- rb_define_method(cField, "indexed?", frt_field_is_indexed, 0);
528
- rb_define_method(cField, "tokenized?", frt_field_is_tokenized, 0);
529
- rb_define_method(cField, "binary?", frt_field_is_binary, 0);
530
- rb_define_method(cField, "compressed?", frt_field_is_compressed, 0);
531
- rb_define_method(cField, "store_term_vector?", frt_field_store_tv, 0);
532
- rb_define_method(cField, "store_positions?", frt_field_store_pos, 0);
533
- rb_define_method(cField, "store_offsets?", frt_field_store_offset, 0);
534
- rb_define_method(cField, "omit_norms?", frt_field_omit_norms, 0);
535
- rb_define_method(cField, "to_s", frt_field_to_s, 0);
536
- rb_define_method(cField, "store=", frt_field_set_store, 1);
537
- rb_define_method(cField, "index=", frt_field_set_index, 1);
538
- rb_define_method(cField, "term_vector=", frt_field_set_term_vector, 1);
539
-
540
- /* Field Constants */
541
- cFieldStore = rb_define_class_under(cField, "Store", rb_cObject);
542
- rb_define_const(cFieldStore, "YES", INT2FIX(DF_STORE_YES));
543
- rb_define_const(cFieldStore, "NO", INT2FIX(DF_STORE_NO));
544
- rb_define_const(cFieldStore, "COMPRESS", INT2FIX(DF_STORE_COMPRESS));
545
- cFieldIndex = rb_define_class_under(cField, "Index", rb_cObject);
546
- rb_define_const(cFieldIndex, "UNTOKENIZED", INT2FIX(DF_INDEX_UNTOKENIZED));
547
- rb_define_const(cFieldIndex, "TOKENIZED", INT2FIX(DF_INDEX_TOKENIZED));
548
- rb_define_const(cFieldIndex, "NO", INT2FIX(DF_INDEX_NO));
549
- rb_define_const(cFieldIndex, "NO_NORMS", INT2FIX(DF_INDEX_NO_NORMS));
550
- cFieldTermVector = rb_define_class_under(cField, "TermVector", rb_cObject);
551
- rb_define_const(cFieldTermVector, "NO", INT2FIX(DF_TERM_VECTOR_NO));
552
- rb_define_const(cFieldTermVector, "YES", INT2FIX(DF_TERM_VECTOR_YES));
553
- rb_define_const(cFieldTermVector, "WITH_POSITIONS",
554
- INT2FIX(DF_TERM_VECTOR_WITH_POSITIONS));
555
- rb_define_const(cFieldTermVector, "WITH_OFFSETS",
556
- INT2FIX(DF_TERM_VECTOR_WITH_OFFSETS));
557
- rb_define_const(cFieldTermVector, "WITH_POSITIONS_OFFSETS",
558
- INT2FIX(DF_TERM_VECTOR_WITH_POSITIONS_OFFSETS));
559
-
560
- /* Document */
561
- cDocument = rb_define_class_under(mDocument, "Document", rb_cObject);
562
- rb_define_alloc_func(cDocument, frt_doc_alloc);
563
-
564
- rb_define_method(cDocument, "initialize", frt_doc_init, 0);
565
- rb_define_method(cDocument, "all_fields", frt_doc_all_fields, 0);
566
- rb_define_method(cDocument, "field_count", frt_doc_field_count, 0);
567
- rb_define_method(cDocument, "entry_count", frt_doc_entry_count, 0);
568
- rb_define_method(cDocument, "add_field", frt_doc_add_field, 1);
569
- rb_define_method(cDocument, "<<", frt_doc_add_field, 1);
570
- rb_define_method(cDocument, "remove_fields", frt_doc_remove_fields, 1);
571
- rb_define_method(cDocument, "remove_field", frt_doc_remove_field, 1);
572
- rb_define_method(cDocument, "field", frt_doc_field, 1);
573
- rb_define_method(cDocument, "fields", frt_doc_fields, 1);
574
- rb_define_method(cDocument, "values", frt_doc_values, 1);
575
- rb_define_method(cDocument, "binaries", frt_doc_binaries, 1);
576
- rb_define_method(cDocument, "[]", frt_doc_values, 1);
577
- rb_define_method(cDocument, "set", frt_doc_set, 2);
578
- rb_define_method(cDocument, "[]=", frt_doc_set, 2);
579
- rb_define_method(cDocument, "to_s", frt_doc_to_s, 0);
580
- rb_define_method(cDocument, "boost", frt_doc_get_boost, 0);
581
- rb_define_method(cDocument, "boost=", frt_doc_set_boost, 1);
582
- }