isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,185 @@
1
+ #include "frt_index.h"
2
+ #include "testhelper.h"
3
+ #include <limits.h>
4
+ #include "test.h"
5
+
6
+ static const char *content_f = "content";
7
+ static const char *id_f = "id";
8
+ const FrtConfig lucene_config = {
9
+ 0x100000, /* chunk size is 1Mb */
10
+ 0x1000000, /* Max memory used for buffer is 16 Mb */
11
+ FRT_INDEX_INTERVAL, /* index interval */
12
+ FRT_SKIP_INTERVAL, /* skip interval */
13
+ 10, /* default merge factor */
14
+ 10, /* max_buffered_docs */
15
+ INT_MAX, /* max_merged_docs */
16
+ 10000, /* maximum field length (number of terms) */
17
+ true /* use compound file by default */
18
+ };
19
+
20
+
21
+ static FrtFieldInfos *prep_fis()
22
+ {
23
+ return frt_fis_new(FRT_STORE_NO, FRT_INDEX_YES, FRT_TERM_VECTOR_NO);
24
+ }
25
+
26
+ static void create_index(FrtStore *store)
27
+ {
28
+ FrtFieldInfos *fis = prep_fis();
29
+ frt_index_create(store, fis);
30
+ frt_fis_deref(fis);
31
+ }
32
+
33
+ /*
34
+ static FrtIndexWriter *create_iw(FrtStore *store)
35
+ {
36
+ create_index(store);
37
+ return frt_iw_open(store, frt_whitespace_analyzer_new(false), &frt_default_config);
38
+ }
39
+
40
+ static FrtIndexWriter *create_iw_conf(FrtStore *store, int max_buffered_docs,
41
+ int merge_factor)
42
+ {
43
+ FrtConfig config = frt_default_config;
44
+ config.max_buffered_docs = max_buffered_docs;
45
+ config.merge_factor = merge_factor;
46
+ create_index(store);
47
+ return frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
48
+ }
49
+ */
50
+
51
+ static FrtIndexWriter *create_iw_lucene(FrtStore *store)
52
+ {
53
+ create_index(store);
54
+ return frt_iw_open(store, frt_whitespace_analyzer_new(false), &lucene_config);
55
+ }
56
+
57
+ static void add_doc(FrtIndexWriter *iw, int id)
58
+ {
59
+ FrtDocument *doc = frt_doc_new();
60
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern(content_f)),
61
+ frt_estrdup("aaa")))->destroy_data = true;
62
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern(id_f)),
63
+ frt_strfmt("%d", id)))->destroy_data = true;
64
+ frt_iw_add_doc(iw, doc);
65
+ frt_doc_destroy(doc);
66
+ }
67
+
68
+ static void add_docs(FrtIndexWriter *iw, int count)
69
+ {
70
+ int i;
71
+ for (i = 0; i < count; i++) {
72
+ add_doc(iw, i);
73
+ }
74
+ }
75
+
76
+ static void copy_file(FrtStore *store, const char *src, const char *dest)
77
+ {
78
+ FrtInStream *is = store->open_input(store, src);
79
+ FrtOutStream *os = store->new_output(store, dest);
80
+ frt_is2os_copy_bytes(is, os, frt_is_length(is));
81
+ frt_is_close(is);
82
+ frt_os_close(os);
83
+ }
84
+
85
+ /*
86
+ * Verify we can read the pre-XXX file format, do searches
87
+ * against it, and add documents to it.
88
+ */
89
+ static void test_delete_leftover_files(TestCase *tc, void *data)
90
+ {
91
+ FrtStore *store = (FrtStore *)data;
92
+ FrtIndexWriter *iw = create_iw_lucene(store);
93
+ FrtIndexReader *ir;
94
+ char *store_before, *store_after;
95
+ add_docs(iw, 35);
96
+ frt_iw_close(iw);
97
+
98
+ /* Delete one doc so we get a .del file: */
99
+ ir = frt_ir_open(store);
100
+ frt_ir_delete_doc(ir, 7);
101
+ Aiequal(1, ir->max_doc(ir) - ir->num_docs(ir));
102
+
103
+ /* Set one norm so we get a .s0 file: */
104
+ frt_ir_set_norm(ir, 21, rb_intern(content_f), 12);
105
+ frt_ir_close(ir);
106
+ store_before = frt_store_to_s(store);
107
+
108
+ /* Create a bogus separate norms file for a
109
+ * segment/field that actually has a separate norms file
110
+ * already: */
111
+ copy_file(store, "_2_1.s0", "_2_2.s0");
112
+
113
+ /* Create a bogus separate norms file for a
114
+ * segment/field that actually has a separate norms file
115
+ * already, using the "not compound file" extension: */
116
+ copy_file(store, "_2_1.s0", "_2_2.f0");
117
+
118
+ /* Create a bogus separate norms file for a
119
+ * segment/field that does not have a separate norms
120
+ * file already: */
121
+ copy_file(store, "_2_1.s0", "_1_1.s0");
122
+
123
+ /* Create a bogus separate norms file for a
124
+ * segment/field that does not have a separate norms
125
+ * file already using the "not compound file" extension: */
126
+ copy_file(store, "_2_1.s0", "_1_1.f0");
127
+
128
+ /* Create a bogus separate del file for a
129
+ * segment that already has a separate del file: */
130
+ copy_file(store, "_0_0.del", "_0_1.del");
131
+
132
+ /* Create a bogus separate del file for a
133
+ * segment that does not yet have a separate del file: */
134
+ copy_file(store, "_0_0.del", "_1_1.del");
135
+
136
+ /* Create a bogus separate del file for a
137
+ * non-existent segment: */
138
+ copy_file(store, "_0_0.del", "_188_1.del");
139
+
140
+ /* Create a bogus segment file: */
141
+ copy_file(store, "_0.cfs", "_188.cfs");
142
+
143
+ /* Create a bogus frq file when the CFS already exists: */
144
+ copy_file(store, "_0.cfs", "_0.frq");
145
+
146
+ /* Create a bogus frq file when the CFS already exists: */
147
+ copy_file(store, "_0.cfs", "_0.frq");
148
+ copy_file(store, "_0.cfs", "_0.prx");
149
+ copy_file(store, "_0.cfs", "_0.fdx");
150
+ copy_file(store, "_0.cfs", "_0.fdt");
151
+ copy_file(store, "_0.cfs", "_0.tfx");
152
+ copy_file(store, "_0.cfs", "_0.tix");
153
+ copy_file(store, "_0.cfs", "_0.tis");
154
+
155
+ /* Create some old segments file: */
156
+ copy_file(store, "segments_5", "segments");
157
+ copy_file(store, "segments_5", "segments_2");
158
+
159
+
160
+ /* Open & close a writer: should delete the above files and nothing more: */
161
+ frt_iw_close(frt_iw_open(store, frt_whitespace_analyzer_new(false), &lucene_config));
162
+
163
+ store_after = frt_store_to_s(store);
164
+
165
+ Asequal(store_before, store_after);
166
+ free(store_before);
167
+ free(store_after);
168
+ }
169
+
170
+ /***************************************************************************
171
+ *
172
+ * IndexFileDeleterSuite
173
+ *
174
+ ***************************************************************************/
175
+
176
+ TestSuite *ts_file_deleter(TestSuite *suite)
177
+ {
178
+ FrtStore *store = frt_open_ram_store();
179
+ suite = ADD_SUITE(suite);
180
+
181
+ tst_run_test(suite, test_delete_leftover_files, store);
182
+
183
+ frt_store_deref(store);
184
+ return suite;
185
+ }
@@ -0,0 +1,331 @@
1
+ #include "testhelper.h"
2
+ #include "frt_search.h"
3
+ #include "test.h"
4
+
5
+ #define FILTER_DOCS_SIZE 10
6
+ #define ARRAY_SIZE 20
7
+
8
+ struct FilterData {
9
+ const char *num;
10
+ const char *date;
11
+ const char *flipflop;
12
+ };
13
+
14
+ static FrtSymbol num, date, flipflop;
15
+
16
+ void prepare_filter_index(FrtStore *store)
17
+ {
18
+ int i;
19
+ FrtIndexWriter *iw;
20
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_NO);
21
+
22
+ num = rb_intern("num");
23
+ date = rb_intern("date");
24
+ flipflop = rb_intern("flipflop");
25
+
26
+ struct FilterData data[FILTER_DOCS_SIZE] = {
27
+ {"0", "20040601", "on"},
28
+ {"1", "20041001", "off"},
29
+ {"2", "20051101", "on"},
30
+ {"3", "20041201", "off"},
31
+ {"4", "20051101", "on"},
32
+ {"5", "20041201", "off"},
33
+ {"6", "20050101", "on"},
34
+ {"7", "20040701", "off"},
35
+ {"8", "20050301", "on"},
36
+ {"9", "20050401", "off"}
37
+ };
38
+
39
+ frt_index_create(store, fis);
40
+ frt_fis_deref(fis);
41
+
42
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
43
+ for (i = 0; i < FILTER_DOCS_SIZE; i++) {
44
+ FrtDocument *doc = frt_doc_new();
45
+ doc->boost = (float)(i+1);
46
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(num), (char *)data[i].num));
47
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(date), (char *)data[i].date));
48
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(flipflop), (char *)data[i].flipflop));
49
+ frt_iw_add_doc(iw, doc);
50
+ frt_doc_destroy(doc);
51
+ }
52
+ frt_iw_close(iw);
53
+ return;
54
+ }
55
+
56
+ static void check_filtered_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, FrtFilter *f, FrtPostFilter *post_filter, const char *expected_hits, int top)
57
+ {
58
+ static int num_array[ARRAY_SIZE];
59
+ int i;
60
+ int total_hits = s2l(expected_hits, num_array);
61
+ FrtTopDocs *top_docs = frt_searcher_search(searcher, query, 0, total_hits + 1,
62
+ f, NULL, post_filter);
63
+ Aiequal(total_hits, top_docs->total_hits);
64
+ Aiequal(total_hits, top_docs->size);
65
+
66
+ if ((top >= 0) && top_docs->size) {
67
+ Aiequal(top, top_docs->hits[0]->doc);
68
+ }
69
+
70
+ for (i = 0; i < top_docs->size; i++) {
71
+ FrtHit *hit = top_docs->hits[i];
72
+ char buf[1000];
73
+ sprintf(buf, "doc %d was found unexpectedly", hit->doc);
74
+ Assert(frt_ary_includes(num_array, total_hits, hit->doc), buf);
75
+ /* only check the explanation if we got the correct docs. Obviously we
76
+ * might want to remove this to visually check the explanations */
77
+ if (total_hits == top_docs->total_hits) {
78
+ FrtExplanation *e = searcher->explain(searcher, query, hit->doc);
79
+ float escore = e->value;
80
+ if (post_filter) {
81
+ escore *= post_filter->filter_func(hit->doc, escore, searcher,
82
+ post_filter->arg);
83
+ }
84
+ Afequal(hit->score, escore);
85
+ frt_expl_destroy(e);
86
+ }
87
+ }
88
+ frt_td_destroy(top_docs);
89
+ }
90
+
91
+ #define TEST_TO_S(mstr, mfilt) \
92
+ do {\
93
+ char *fstr = mfilt->to_s(mfilt);\
94
+ Asequal(mstr, fstr);\
95
+ free(fstr);\
96
+ } while (0)
97
+
98
+ static void test_range_filter(TestCase *tc, void *data)
99
+ {
100
+ FrtSearcher *searcher = (FrtSearcher *)data;
101
+ FrtQuery *q = frt_maq_new();
102
+ FrtFilter *rf = frt_rfilt_new(num, "2", "6", true, true);
103
+ check_filtered_hits(tc, searcher, q, rf, NULL, "2,3,4,5,6", -1);
104
+ TEST_TO_S("RangeFilter< num:[2 6] >", rf);
105
+ frt_filt_deref(rf);
106
+ rf = frt_rfilt_new(num, "2", "6", true, false);
107
+ check_filtered_hits(tc, searcher, q, rf, NULL, "2,3,4,5", -1);
108
+ TEST_TO_S("RangeFilter< num:[2 6} >", rf);
109
+ frt_filt_deref(rf);
110
+ rf = frt_rfilt_new(num, "2", "6", false, true);
111
+ check_filtered_hits(tc, searcher, q, rf, NULL, "3,4,5,6", -1);
112
+ TEST_TO_S("RangeFilter< num:{2 6] >", rf);
113
+ frt_filt_deref(rf);
114
+ rf = frt_rfilt_new(num, "2", "6", false, false);
115
+ check_filtered_hits(tc, searcher, q, rf, NULL, "3,4,5", -1);
116
+ TEST_TO_S("RangeFilter< num:{2 6} >", rf);
117
+ frt_filt_deref(rf);
118
+ rf = frt_rfilt_new(num, "6", NULL, true, false);
119
+ check_filtered_hits(tc, searcher, q, rf, NULL, "6,7,8,9", -1);
120
+ TEST_TO_S("RangeFilter< num:[6> >", rf);
121
+ frt_filt_deref(rf);
122
+ rf = frt_rfilt_new(num, "6", NULL, false, false);
123
+ check_filtered_hits(tc, searcher, q, rf, NULL, "7,8,9", -1);
124
+ TEST_TO_S("RangeFilter< num:{6> >", rf);
125
+ frt_filt_deref(rf);
126
+ rf = frt_rfilt_new(num, NULL, "2", false, true);
127
+ check_filtered_hits(tc, searcher, q, rf, NULL, "0,1,2", -1);
128
+ TEST_TO_S("RangeFilter< num:<2] >", rf);
129
+ frt_filt_deref(rf);
130
+ rf = frt_rfilt_new(num, NULL, "2", false, false);
131
+ check_filtered_hits(tc, searcher, q, rf, NULL, "0,1", -1);
132
+ TEST_TO_S("RangeFilter< num:<2} >", rf);
133
+ frt_filt_deref(rf);
134
+ frt_q_deref(q);
135
+ }
136
+
137
+ static void test_range_filter_hash(TestCase *tc, void *data)
138
+ {
139
+ FrtFilter *f1, *f2;
140
+ (void)data;
141
+ f1 = frt_rfilt_new(date, "20051006", "20051010", true, true);
142
+ f2 = frt_rfilt_new(date, "20051006", "20051010", true, true);
143
+
144
+ Assert(frt_filt_eq(f1, f1), "Test same queries are equal");
145
+ Aiequal(frt_filt_hash(f1), frt_filt_hash(f2));
146
+ Assert(frt_filt_eq(f1, f2), "Queries are equal");
147
+ frt_filt_deref(f2);
148
+
149
+ f2 = frt_rfilt_new(date, "20051006", "20051010", true, false);
150
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Upper bound include differs");
151
+ Assert(!frt_filt_eq(f1, f2), "Upper bound include differs");
152
+ frt_filt_deref(f2);
153
+
154
+ f2 = frt_rfilt_new(date, "20051006", "20051010", false, true);
155
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Lower bound include differs");
156
+ Assert(!frt_filt_eq(f1, f2), "Lower bound include differs");
157
+ frt_filt_deref(f2);
158
+
159
+ f2 = frt_rfilt_new(date, "20051006", "20051011", true, true);
160
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Upper bound differs");
161
+ Assert(!frt_filt_eq(f1, f2), "Upper bound differs");
162
+ frt_filt_deref(f2);
163
+
164
+ f2 = frt_rfilt_new(date, "20051005", "20051010", true, true);
165
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Lower bound differs");
166
+ Assert(!frt_filt_eq(f1, f2), "Lower bound differs");
167
+ frt_filt_deref(f2);
168
+
169
+ f2 = frt_rfilt_new(date, "20051006", NULL, true, false);
170
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Upper bound is NULL");
171
+ Assert(!frt_filt_eq(f1, f2), "Upper bound is NULL");
172
+ frt_filt_deref(f2);
173
+
174
+ f2 = frt_rfilt_new(date, NULL, "20051010", false, true);
175
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Lower bound is NULL");
176
+ Assert(!frt_filt_eq(f1, f2), "Lower bound is NULL");
177
+ frt_filt_deref(f2);
178
+
179
+ f2 = frt_rfilt_new(flipflop, "20051006", "20051010", true, true);
180
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "Field differs");
181
+ Assert(!frt_filt_eq(f1, f2), "Field differs");
182
+ frt_filt_deref(f2);
183
+ frt_filt_deref(f1);
184
+
185
+ f1 = frt_rfilt_new(date, NULL, "20051010", false, true);
186
+ f2 = frt_rfilt_new(date, NULL, "20051010", false, true);
187
+ Aiequal(frt_filt_hash(f1), frt_filt_hash(f2));
188
+ Assert(frt_filt_eq(f1, f2), "Queries are equal");
189
+ frt_filt_deref(f2);
190
+ frt_filt_deref(f1);
191
+ }
192
+
193
+ static void test_query_filter(TestCase *tc, void *data)
194
+ {
195
+ FrtSearcher *searcher = (FrtSearcher *)data;
196
+ FrtQuery *bq;
197
+ FrtFilter *qf;
198
+ FrtQuery *q = frt_maq_new();
199
+
200
+ qf = frt_qfilt_new_nr(frt_tq_new(flipflop, "on"));
201
+ TEST_TO_S("QueryFilter< flipflop:on >", qf);
202
+ check_filtered_hits(tc, searcher, q, qf, NULL, "0,2,4,6,8", -1);
203
+ frt_filt_deref(qf);
204
+
205
+ bq = frt_bq_new(false);
206
+ frt_bq_add_query_nr(bq, frt_tq_new(date, "20051101"), FRT_BC_SHOULD);
207
+ frt_bq_add_query_nr(bq, frt_tq_new(date, "20041201"), FRT_BC_SHOULD);
208
+ qf = frt_qfilt_new_nr(bq);
209
+ check_filtered_hits(tc, searcher, q, qf, NULL, "2,3,4,5", -1);
210
+ TEST_TO_S("QueryFilter< date:20051101 date:20041201 >", qf);
211
+ frt_filt_deref(qf);
212
+
213
+ frt_q_deref(q);
214
+ }
215
+
216
+ static void test_query_filter_hash(TestCase *tc, void *data)
217
+ {
218
+ FrtFilter *f1, *f2;
219
+ (void)data;
220
+ f1 = frt_qfilt_new_nr(frt_tq_new(rb_intern("A"), "a"));
221
+ f2 = frt_qfilt_new_nr(frt_tq_new(rb_intern("A"), "a"));
222
+
223
+ Aiequal(frt_filt_hash(f1), frt_filt_hash(f2));
224
+ Assert(frt_filt_eq(f1, f2), "Queries are equal");
225
+ Assert(frt_filt_eq(f1, f1), "Queries are equal");
226
+ frt_filt_deref(f2);
227
+
228
+ f2 = frt_qfilt_new_nr(frt_tq_new(rb_intern("A"), "b"));
229
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "texts differ");
230
+ Assert(!frt_filt_eq(f1, f2), "texts differ");
231
+ frt_filt_deref(f2);
232
+
233
+ f2 = frt_qfilt_new_nr(frt_tq_new(rb_intern("B"), "a"));
234
+ Assert(frt_filt_hash(f1) != frt_filt_hash(f2), "fields differ");
235
+ Assert(!frt_filt_eq(f1, f2), "fields differ");
236
+ frt_filt_deref(f2);
237
+
238
+ frt_filt_deref(f1);
239
+ }
240
+
241
+ static float odd_number_filter(int doc_num, float score, FrtSearcher *sea, void *arg)
242
+ {
243
+ float is_ok = 0.0;
244
+ FrtLazyDoc *lazy_doc = frt_searcher_get_lazy_doc(sea, doc_num);
245
+ FrtLazyDocField *lazy_df = frt_lazy_doc_get(lazy_doc, rb_intern("num"));
246
+ char *num = frt_lazy_df_get_data(lazy_df, 0);
247
+ (void)score;
248
+ (void)arg;
249
+
250
+ if ((atoi(num) % 2) == 0) {
251
+ is_ok = 1.0;
252
+ }
253
+
254
+ frt_lazy_doc_close(lazy_doc);
255
+ return is_ok;
256
+ }
257
+
258
+ static float distance_filter(int doc_num, float score, FrtSearcher *sea, void *arg)
259
+ {
260
+ int start_point = *((int *)arg);
261
+ float distance = 0.0;
262
+ FrtLazyDoc *lazy_doc = frt_searcher_get_lazy_doc(sea, doc_num);
263
+ FrtLazyDocField *lazy_df = frt_lazy_doc_get(lazy_doc, rb_intern("num"));
264
+ char *num = frt_lazy_df_get_data(lazy_df, 0);
265
+ (void)score;
266
+
267
+ distance = 1.0/(1 + (start_point - atoi(num)) * (start_point - atoi(num)));
268
+
269
+ frt_lazy_doc_close(lazy_doc);
270
+ return distance;
271
+ }
272
+
273
+ static void test_filter_func(TestCase *tc, void *data)
274
+ {
275
+ FrtSearcher *searcher = (FrtSearcher *)data;
276
+ FrtQuery *q = frt_maq_new();
277
+ FrtFilter *rf = frt_rfilt_new(num, "2", "6", true, true);
278
+ FrtPostFilter odd_filter;
279
+ odd_filter.filter_func = odd_number_filter;
280
+ odd_filter.arg = NULL;
281
+
282
+ check_filtered_hits(tc, searcher, q, NULL,
283
+ &odd_filter, "0,2,4,6,8", -1);
284
+ check_filtered_hits(tc, searcher, q, rf,
285
+ &odd_filter, "2,4,6", -1);
286
+ frt_filt_deref(rf);
287
+ frt_q_deref(q);
288
+ }
289
+
290
+ static void test_score_altering_filter_func(TestCase *tc, void *data)
291
+ {
292
+ FrtSearcher *searcher = (FrtSearcher *)data;
293
+ FrtQuery *q = frt_maq_new();
294
+ FrtFilter *rf = frt_rfilt_new(num, "4", "8", true, true);
295
+ int start_point = 7;
296
+ FrtPostFilter dist_filter;
297
+ dist_filter.filter_func = &distance_filter;
298
+ dist_filter.arg = &start_point;
299
+
300
+ check_filtered_hits(tc, searcher, q, NULL,
301
+ &dist_filter, "7,6,8,5,9,4,3,2,1,0", -1);
302
+ check_filtered_hits(tc, searcher, q, rf,
303
+ &dist_filter, "7,6,8,5,4", -1);
304
+ frt_filt_deref(rf);
305
+ frt_q_deref(q);
306
+ }
307
+
308
+ TestSuite *ts_filter(TestSuite *suite)
309
+ {
310
+ FrtStore *store;
311
+ FrtIndexReader *ir;
312
+ FrtSearcher *searcher;
313
+
314
+ suite = ADD_SUITE(suite);
315
+
316
+ store = frt_open_ram_store();
317
+ prepare_filter_index(store);
318
+ ir = frt_ir_open(store);
319
+ searcher = frt_isea_new(ir);
320
+
321
+ tst_run_test(suite, test_range_filter, (void *)searcher);
322
+ tst_run_test(suite, test_range_filter_hash, NULL);
323
+ tst_run_test(suite, test_query_filter, (void *)searcher);
324
+ tst_run_test(suite, test_query_filter_hash, NULL);
325
+ tst_run_test(suite, test_filter_func, searcher);
326
+ tst_run_test(suite, test_score_altering_filter_func, searcher);
327
+
328
+ frt_store_deref(store);
329
+ searcher->close(searcher);
330
+ return suite;
331
+ }
@@ -0,0 +1,25 @@
1
+ #include "frt_store.h"
2
+ #include "test_store.h"
3
+ #include "test.h"
4
+
5
+ /**
6
+ * Test a FileSystem store
7
+ */
8
+ TestSuite *ts_fs_store(TestSuite *suite)
9
+ {
10
+
11
+ #if defined POSH_OS_WIN32 || defined POSH_OS_WIN64
12
+ FrtStore *store = frt_open_fs_store(".\\test\\testdir\\store");
13
+ #else
14
+ FrtStore *store = frt_open_fs_store("./test/testdir/store");
15
+ #endif
16
+ store->clear(store);
17
+
18
+ suite = ADD_SUITE(suite);
19
+
20
+ create_test_store_suite(suite, store);
21
+
22
+ frt_store_deref(store);
23
+
24
+ return suite;
25
+ }