ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/CHANGELOG DELETED
@@ -1,9 +0,0 @@
1
- 20060316:
2
- * changed Token#term_text to Token#text
3
- * changed Token#position_increment to Term#pos_inc
4
- * changed order of args to Token.new. Now Term.new(text, start_offset,
5
- end_offset, pos_inc=1, type="text"). NOTE: type does nothing.
6
- * changed TermVectorOffsetInfo#start_offset to TermVectorOffsetInfo#start
7
- * changed TermVectorOffsetInfo#end_offset to TermVectorOffsetInfo#end
8
- * added :id_field option to Index::Index class.
9
-
data/ext/dummy.exe DELETED
Binary file
data/ext/field.c DELETED
@@ -1,408 +0,0 @@
1
- #include "index.h"
2
- #include <string.h>
3
-
4
- /****************************************************************************
5
- *
6
- * FieldInfo
7
- *
8
- ****************************************************************************/
9
-
10
- FieldInfo *fi_create(char *name, int number, bool is_indexed,
11
- bool store_tv, bool store_pos, bool store_offset, bool omit_norms)
12
- {
13
- FieldInfo *fi = ALLOC(FieldInfo);
14
- fi->name = estrdup(name);
15
- fi->number = number;
16
- fi->is_indexed = is_indexed;
17
- fi->store_tv = store_tv;
18
- fi->store_offset = store_offset;
19
- fi->store_pos = store_pos;
20
- fi->omit_norms = omit_norms;
21
- return fi;
22
- }
23
-
24
- void fi_destroy(FieldInfo *fi)
25
- {
26
- free(fi->name);
27
- free(fi);
28
- }
29
-
30
- /****************************************************************************
31
- *
32
- * FieldInfos
33
- *
34
- ****************************************************************************/
35
-
36
- FieldInfos *fis_create()
37
- {
38
- FieldInfos *fis = ALLOC(FieldInfos);
39
- fis->by_name = ht_create();
40
- fis->by_number = NULL;
41
- fis->fcnt = 0;
42
- return fis;
43
- }
44
-
45
- FieldInfos *fis_open(Store *store, char *filename)
46
- {
47
- FieldInfos *fis = fis_create();
48
- InStream *is = store->open_input(store, filename);
49
- TRY
50
- fis_read(fis, is);
51
- XFINALLY
52
- is_close(is);
53
- XENDTRY
54
- return fis;
55
- }
56
-
57
- void fis_destroy(FieldInfos *fis)
58
- {
59
- int i;
60
- for (i = 0; i < fis->fcnt; i++) {
61
- fi_destroy(fis->by_number[i]);
62
- }
63
- ht_destroy(fis->by_name);
64
- free(fis->by_number);
65
- free(fis);
66
- }
67
-
68
- FieldInfo *fis_add(FieldInfos *fis,
69
- char *name,
70
- bool is_indexed,
71
- bool store_tv,
72
- bool store_pos,
73
- bool store_offset,
74
- bool omit_norms)
75
- {
76
- FieldInfo *fi = ht_get(fis->by_name, name);
77
- if (fi == NULL) {
78
- fi = fi_create(name, fis->fcnt, is_indexed, store_tv,
79
- store_pos, store_offset, omit_norms);
80
- fis->fcnt++;
81
- REALLOC_N(fis->by_number, FieldInfo *, fis->fcnt);
82
-
83
- fis->by_number[fi->number] = fi;
84
- ht_set(fis->by_name, name, fi);
85
- } else {
86
- if (fi->is_indexed != is_indexed)
87
- fi->is_indexed = true; // once indexed, always index
88
- if (fi->store_tv != store_tv)
89
- fi->store_tv = true; // once vector, always vector
90
- if (fi->store_pos != store_pos)
91
- fi->store_pos = true; // once vector, always vector
92
- if (fi->store_offset != store_offset)
93
- fi->store_offset = true; // once vector, always vector
94
- if (fi->omit_norms != omit_norms)
95
- fi->omit_norms = false; // once kept, always keep
96
- }
97
- return fi;
98
- }
99
-
100
- void fis_add_fields(FieldInfos *fis,
101
- HashSet *field_names,
102
- bool is_indexed,
103
- bool store_tv,
104
- bool store_pos,
105
- bool store_offset,
106
- bool omit_norms)
107
- {
108
- int i;
109
- for (i = 0; i < field_names->size; i++) {
110
- fis_add(fis, field_names->elems[i], is_indexed, store_tv, store_pos,
111
- store_offset, omit_norms);
112
- }
113
- hs_destroy(field_names);
114
- }
115
-
116
- bool fis_has_vectors(FieldInfos *fis)
117
- {
118
- int i;
119
- for (i = 0; i < fis->fcnt; i++) {
120
- if (fis->by_number[i]->store_tv)
121
- return true;
122
- }
123
- return false;
124
- }
125
-
126
- FieldInfo *fis_get_fi(FieldInfos *fis, char *name)
127
- {
128
- return (FieldInfo *)ht_get(fis->by_name, name);
129
- }
130
-
131
- ullong fis_get_number(FieldInfos *fis, char *name)
132
- {
133
- FieldInfo *fi = (FieldInfo *)ht_get(fis->by_name, name);
134
- if (fi == NULL)
135
- return NOT_A_FIELD;
136
- else
137
- return fi->number;
138
- }
139
-
140
- #define IS_INDEXED 0x01
141
- #define STORE_TV 0x02
142
- #define STORE_POS 0x04
143
- #define STORE_OFFSET 0x08
144
- #define OMIT_NORMS 0x10
145
-
146
- int fi_field_info_byte(FieldInfo *fi)
147
- {
148
- int bits = 0x0;
149
- if (fi->is_indexed)
150
- bits |= IS_INDEXED;
151
- if (fi->store_tv)
152
- bits |= STORE_TV;
153
- if (fi->store_pos)
154
- bits |= STORE_POS;
155
- if (fi->store_offset)
156
- bits |= STORE_OFFSET;
157
- if (fi->omit_norms)
158
- bits |= OMIT_NORMS;
159
- return bits;
160
- }
161
-
162
- void fis_write(FieldInfos *fis, Store *store, char *segment, char *ext)
163
- {
164
- int i;
165
- FieldInfo *fi;
166
- char fname[SEGMENT_NAME_MAX_LENGTH];
167
- OutStream *os;
168
-
169
- strcpy(fname, segment);
170
- strcat(fname, ext);
171
- os = store->create_output(store, fname);
172
- TRY
173
- os_write_vint(os, fis->fcnt);
174
- for (i = 0; i < fis->fcnt; i++) {
175
- fi = fis->by_number[i];
176
- os_write_string(os, fi->name);
177
- os_write_vint(os, fi_field_info_byte(fi));
178
- }
179
- XFINALLY
180
- os_close(os);
181
- XENDTRY
182
- }
183
-
184
- FieldInfos *fis_read(FieldInfos *fis, InStream *is)
185
- {
186
- int i, size = (int)is_read_vint(is); /* read in the size */
187
- int bits, is_indexed, store_tv, store_pos, store_offset, omit_norms;
188
- char *name;
189
- for (i = 0; i < size; i++) {
190
- name = is_read_string(is);
191
- bits = is_read_byte(is);
192
- is_indexed = (bits & IS_INDEXED) != 0;
193
- store_tv = (bits & STORE_TV) != 0;
194
- store_pos = (bits & STORE_POS) != 0;
195
- store_offset = (bits & STORE_OFFSET) != 0;
196
- omit_norms = (bits & OMIT_NORMS) != 0;
197
- fis_add(fis, name, is_indexed, store_tv,
198
- store_pos, store_offset, omit_norms);
199
- free(name);
200
- }
201
- return fis;
202
- }
203
-
204
- FieldInfos *fis_add_doc(FieldInfos *fis, Document *doc)
205
- {
206
- int i;
207
- DocField *df;
208
- for (i = 0; i < doc->dfcnt; i++) {
209
- df = doc->df_arr[i];
210
- fis_add(fis, df->name, df->is_indexed, df->store_tv,
211
- df->store_pos, df->store_offset, df->omit_norms);
212
- }
213
- return fis;
214
- }
215
-
216
- /****************************************************************************
217
- *
218
- * FieldsReader
219
- *
220
- ****************************************************************************/
221
-
222
- FieldsReader *fr_open(Store *store, char *segment, FieldInfos *fis)
223
- {
224
- FieldsReader *fr = ALLOC(FieldsReader);
225
- InStream *iin;
226
- char buf[100];
227
- int slen = (int)strlen(segment);
228
- strcpy(buf, segment);
229
-
230
- fr->fis = fis;
231
- strcpy(buf+slen, ".fdt");
232
- fr->fields_in = store->open_input(store, buf);
233
- strcpy(buf+slen, ".fdx");
234
- iin = fr->index_in = store->open_input(store, buf);
235
- fr->len = iin->length_internal(iin)/8;
236
- return fr;
237
- }
238
-
239
- void fr_close(FieldsReader *fr)
240
- {
241
- is_close(fr->fields_in);
242
- is_close(fr->index_in);
243
- free(fr);
244
- }
245
-
246
- Document *fr_get_doc(FieldsReader *fr, int doc_num)
247
- {
248
- int i, bits, dlen;
249
- FieldInfo *fi;
250
- char *data;
251
- int store, index, stv;
252
- int is_compressed, is_tokenized, is_binary;
253
- int position, field_cnt, field_number;
254
- Document *doc = doc_create();
255
- InStream *iin = fr->index_in;
256
- InStream *fin = fr->fields_in;
257
-
258
- is_seek(iin, doc_num * 8);
259
- position = (int)is_read_long(iin);
260
- is_seek(fin, position);
261
- field_cnt = (int)is_read_vint(fin);
262
-
263
- for (i = 0; i < field_cnt; i++) {
264
- field_number = (int)is_read_vint(fin);
265
- fi = fr->fis->by_number[field_number];
266
-
267
- bits = is_read_byte(fin);
268
-
269
- is_compressed = (bits & FIELD_IS_COMPRESSED) != 0;
270
- is_tokenized = (bits & FIELD_IS_TOKENIZED) != 0;
271
- is_binary = (bits & FIELD_IS_BINARY) != 0;
272
-
273
- if (is_binary) {
274
- dlen = (int)is_read_vint(fin);
275
- data = ALLOC_N(char, dlen);
276
- is_read_bytes(fin, (uchar *)data, 0, dlen);
277
- if (is_compressed) {
278
- doc_add_field(doc, df_create_binary(fi->name, data, dlen, DF_STORE_COMPRESS));
279
- } else {
280
- doc_add_field(doc, df_create_binary(fi->name, data, dlen, DF_STORE_YES));
281
- }
282
- } else {
283
- store = DF_STORE_YES;
284
- if (!fi->is_indexed) {
285
- index = DF_INDEX_NO;
286
- } else if (is_tokenized) {
287
- index = DF_INDEX_TOKENIZED;
288
- } else if (fi->omit_norms) {
289
- index = DF_INDEX_NO_NORMS;
290
- } else {
291
- index = DF_INDEX_UNTOKENIZED;
292
- }
293
- data = NULL;
294
- if (is_compressed) {
295
- store = DF_STORE_COMPRESS;
296
- dlen = (int)is_read_vint(fin);
297
- data = ALLOC_N(char, (dlen + 1));
298
- data[dlen] = '\0';
299
- is_read_bytes(fin, (uchar *)data, 0, dlen);
300
- } else {
301
- data = is_read_string(fin);
302
- }
303
- stv = DF_TERM_VECTOR_NO;
304
- if (fi->store_tv) {
305
- if (fi->store_pos && fi->store_offset) {
306
- stv = DF_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
307
- } else if (fi->store_pos) {
308
- stv = DF_TERM_VECTOR_WITH_POSITIONS;
309
- } else if (fi->store_offset) {
310
- stv = DF_TERM_VECTOR_WITH_OFFSETS;
311
- } else {
312
- stv = DF_TERM_VECTOR_YES;
313
- }
314
- }
315
- doc_add_field(doc, df_create(fi->name, data, store, index, stv));
316
- }
317
- }
318
-
319
- return doc;
320
- }
321
-
322
- /****************************************************************************
323
- *
324
- * FieldsWriter
325
- *
326
- ****************************************************************************/
327
-
328
- FieldsWriter *fw_open(Store *store, char *segment, FieldInfos *fis)
329
- {
330
- FieldsWriter *fw = ALLOC(FieldsWriter);
331
- char buf[SEGMENT_NAME_MAX_LENGTH];
332
- int slen = (int)strlen(segment);
333
-
334
- strcpy(buf, segment);
335
-
336
- fw->fis = fis;
337
- strcpy(buf+slen, ".fdt");
338
- fw->fields_out = store->create_output(store, buf);
339
- strcpy(buf+slen, ".fdx");
340
- fw->index_out = store->create_output(store, buf);
341
- return fw;
342
- }
343
-
344
- void fw_close(FieldsWriter *fw)
345
- {
346
- os_close(fw->fields_out);
347
- os_close(fw->index_out);
348
- free(fw);
349
- }
350
-
351
- void save_data(OutStream *fout, char *data, int dlen)
352
- {
353
- os_write_vint(fout, dlen);
354
- os_write_bytes(fout, (uchar *)data, dlen);
355
- }
356
-
357
- void fw_add_doc(FieldsWriter *fw, Document *doc)
358
- {
359
- int i, bits;
360
- DocField *df;
361
- char *data;
362
- int stored_count = 0;
363
- OutStream *fout = fw->fields_out, *iout = fw->index_out;
364
-
365
- os_write_long(iout, os_pos(fout));
366
-
367
- for (i = 0; i < doc->dfcnt; i++) {
368
- if (doc->df_arr[i]->is_stored)
369
- stored_count++;
370
- }
371
- os_write_vint(fout, stored_count);
372
-
373
- for (i = 0; i < doc->dfcnt; i++) {
374
- df = doc->df_arr[i];
375
- if (df->is_stored) {
376
- os_write_vint(fout, ((FieldInfo *)ht_get(fw->fis->by_name, df->name))->number);
377
-
378
- bits = 0;
379
- if (df->is_tokenized) {
380
- bits |= FIELD_IS_TOKENIZED;
381
- }
382
- if (df->is_binary) {
383
- bits |= FIELD_IS_BINARY;
384
- }
385
- if (df->is_compressed) {
386
- bits |= FIELD_IS_COMPRESSED;
387
- }
388
- os_write_byte(fout, bits);
389
-
390
- data = NULL;
391
- if (df->is_compressed) {
392
- /* Not compressing just yet but we'll save it anyway */
393
- if (df->is_binary) {
394
- save_data(fout, df->data, df->blen);
395
- } else {
396
- os_write_string(fout, df->data);
397
- }
398
- } else {
399
- if (df->is_binary) {
400
- save_data(fout, df->data, df->blen);
401
- } else {
402
- os_write_string(fout, df->data);
403
- }
404
- }
405
- }
406
- }
407
- }
408
-
data/ext/frtio.h DELETED
@@ -1,13 +0,0 @@
1
- #ifndef FRT_IO_H
2
- #define FRT_IO_H
3
-
4
- extern char *join_path(char *buf, const char *base, const char *filename);
5
- extern bool exists(char *path);
6
- extern int fcount(char *path);
7
- extern void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg);
8
- extern void fs_clear_locks(Store *store);
9
- extern void fs_clear(Store *store);
10
- extern void fs_clear_all(Store *store);
11
-
12
- #endif
13
-
data/ext/inc/except.h DELETED
@@ -1,90 +0,0 @@
1
- #ifndef FRT_EXCEPT_H
2
- #define FRT_EXCEPT_H
3
-
4
- #include <setjmp.h>
5
- #include <ruby.h>
6
-
7
- #define BODY 0
8
- #define FINALLY -1
9
- #define EXCEPTION 1
10
- #define IO_ERROR 2
11
- #define ARG_ERROR 3
12
- #define EOF_ERROR 4
13
- #define UNSUPPORTED_ERROR 5
14
- #define STATE_ERROR 6
15
- #define PARSE_ERROR 7
16
- #define MEM_ERROR 8
17
-
18
- typedef struct xcontext_t {
19
- jmp_buf jbuf;
20
- struct xcontext_t *next;
21
- char *msg;
22
- volatile int excode;
23
- int handled : 1;
24
- int in_finally : 1;
25
- } xcontext_t;
26
-
27
- RUBY_EXTERN int rb_thread_critical;
28
- extern xcontext_t *xtop_context;
29
-
30
- #define TRY\
31
- do {\
32
- xcontext_t xcontext;\
33
- rb_thread_critical = Qtrue;\
34
- xcontext.next = xtop_context;\
35
- xtop_context = &xcontext;\
36
- xcontext.handled = true;\
37
- xcontext.in_finally = false;\
38
- switch (setjmp(xcontext.jbuf)) {\
39
- case BODY:
40
-
41
-
42
- #define XENDTRY\
43
- }\
44
- xtop_context = xcontext.next;\
45
- if (!xcontext.handled) {\
46
- RAISE(xcontext.excode, xcontext.msg);\
47
- }\
48
- rb_thread_critical = 0;\
49
- } while (0);
50
-
51
- #define ENDTRY\
52
- }\
53
- if (!xcontext.in_finally) {\
54
- xtop_context = xcontext.next;\
55
- if (!xcontext.handled) {\
56
- RAISE(xcontext.excode, xcontext.msg);\
57
- }\
58
- xcontext.in_finally = 1;\
59
- longjmp(xcontext.jbuf, FINALLY);\
60
- }\
61
- rb_thread_critical = 0;\
62
- } while (0);
63
-
64
- #define XFINALLY default: xcontext.in_finally = 1;
65
-
66
- #define XCATCHALL break; default: xcontext.in_finally = 1;
67
-
68
- //fprintf(stderr,"Error occured in %s, %d: %s\n", __FILE__, __LINE__, __func__);
69
- #define RAISE(xexcode, xmsg) \
70
- do {\
71
- if (!xtop_context) {\
72
- eprintf(EXCEPTION_CODE, "Error: exception %d not handled: %s", xexcode, xmsg);\
73
- } else if (!xtop_context->in_finally) {\
74
- xtop_context->msg = xmsg;\
75
- xtop_context->excode = xexcode;\
76
- xtop_context->handled = false;\
77
- longjmp(xtop_context->jbuf, xexcode);\
78
- } else if (xtop_context->handled) {\
79
- xtop_context->msg = xmsg;\
80
- xtop_context->excode = xexcode;\
81
- xtop_context->handled = false;\
82
- }\
83
- } while (0)
84
-
85
- #define HANDLED() xcontext.handled = 1 /* true */
86
-
87
- extern char * const UNSUPPORTED_ERROR_MSG;
88
- extern char * const EOF_ERROR_MSG;
89
-
90
- #endif