isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,2323 @@
1
+ #include "frt_index.h"
2
+ #include "testhelper.h"
3
+ #include "test.h"
4
+
5
+ static FrtSymbol body, title, text, author, year, changing_field, compressed_field, tag;
6
+
7
+ static FrtFieldInfos *prep_all_fis()
8
+ {
9
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_YES, FRT_TERM_VECTOR_NO);
10
+ frt_fis_add_field(fis, frt_fi_new(rb_intern("tv"), FRT_STORE_NO, FRT_INDEX_YES, FRT_TERM_VECTOR_YES));
11
+ frt_fis_add_field(fis, frt_fi_new(rb_intern("tv un-t"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED,
12
+ FRT_TERM_VECTOR_YES));
13
+ frt_fis_add_field(fis, frt_fi_new(rb_intern("tv+offsets"), FRT_STORE_NO, FRT_INDEX_YES,
14
+ FRT_TERM_VECTOR_WITH_OFFSETS));
15
+ frt_fis_add_field(fis, frt_fi_new(rb_intern("tv+offsets un-t"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED,
16
+ FRT_TERM_VECTOR_WITH_OFFSETS));
17
+ return fis;
18
+
19
+ }
20
+
21
+ static void destroy_docs(FrtDocument **docs, int len)
22
+ {
23
+ int i;
24
+ for (i = 0; i < len; i++) {
25
+ frt_doc_destroy(docs[i]);
26
+ }
27
+ free(docs);
28
+ }
29
+
30
+ static FrtFieldInfos *prep_book_fis()
31
+ {
32
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
33
+ FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
34
+ frt_fis_add_field(fis, frt_fi_new(rb_intern("year"), FRT_STORE_YES, FRT_INDEX_NO, FRT_TERM_VECTOR_NO));
35
+ return fis;
36
+
37
+ }
38
+
39
+ FrtDocument *prep_book()
40
+ {
41
+ FrtDocument *doc = frt_doc_new();
42
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(author),
43
+ frt_estrdup("P.H. Newby")))->destroy_data = true;
44
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(title),
45
+ frt_estrdup("Something To Answer For")))->destroy_data = true;
46
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(year),
47
+ frt_estrdup("1969")))->destroy_data = true;
48
+ return doc;
49
+ }
50
+
51
+ #define BOOK_LIST_LENGTH 37
52
+ FrtDocument **prep_book_list()
53
+ {
54
+ FrtDocument **docs = FRT_ALLOC_N(FrtDocument *, BOOK_LIST_LENGTH);
55
+ docs[0] = frt_doc_new();
56
+ frt_doc_add_field(docs[0], frt_df_add_data(frt_df_new(author),
57
+ frt_estrdup("P.H. Newby")))->destroy_data = true;
58
+ frt_doc_add_field(docs[0], frt_df_add_data(frt_df_new(title),
59
+ frt_estrdup("Something To Answer For")))->destroy_data = true;
60
+ frt_doc_add_field(docs[0], frt_df_add_data(frt_df_new(year),
61
+ frt_estrdup("1969")))->destroy_data = true;
62
+ docs[1] = frt_doc_new();
63
+ frt_doc_add_field(docs[1], frt_df_add_data(frt_df_new(author),
64
+ frt_estrdup("Bernice Rubens")))->destroy_data = true;
65
+ frt_doc_add_field(docs[1], frt_df_add_data(frt_df_new(title),
66
+ frt_estrdup("The Elected Member")))->destroy_data = true;
67
+ frt_doc_add_field(docs[1], frt_df_add_data(frt_df_new(year),
68
+ frt_estrdup("1970")))->destroy_data = true;
69
+ docs[2] = frt_doc_new();
70
+ frt_doc_add_field(docs[2], frt_df_add_data(frt_df_new(author),
71
+ frt_estrdup("V. S. Naipaul")))->destroy_data = true;
72
+ frt_doc_add_field(docs[2], frt_df_add_data(frt_df_new(title),
73
+ frt_estrdup("In a Free State")))->destroy_data = true;
74
+ frt_doc_add_field(docs[2], frt_df_add_data(frt_df_new(year),
75
+ frt_estrdup("1971")))->destroy_data = true;
76
+ docs[3] = frt_doc_new();
77
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(author),
78
+ frt_estrdup("John Berger")))->destroy_data = true;
79
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(title),
80
+ frt_estrdup("G")))->destroy_data = true;
81
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(year),
82
+ frt_estrdup("1972")))->destroy_data = true;
83
+ docs[4] = frt_doc_new();
84
+ frt_doc_add_field(docs[4], frt_df_add_data(frt_df_new(author),
85
+ frt_estrdup("J. G. Farrell")))->destroy_data = true;
86
+ frt_doc_add_field(docs[4], frt_df_add_data(frt_df_new(title),
87
+ frt_estrdup("The Siege of Krishnapur")))->destroy_data = true;
88
+ frt_doc_add_field(docs[4], frt_df_add_data(frt_df_new(year),
89
+ frt_estrdup("1973")))->destroy_data = true;
90
+ docs[5] = frt_doc_new();
91
+ frt_doc_add_field(docs[5], frt_df_add_data(frt_df_new(author),
92
+ frt_estrdup("Stanley Middleton")))->destroy_data = true;
93
+ frt_doc_add_field(docs[5], frt_df_add_data(frt_df_new(title),
94
+ frt_estrdup("Holiday")))->destroy_data = true;
95
+ frt_doc_add_field(docs[5], frt_df_add_data(frt_df_new(year),
96
+ frt_estrdup("1974")))->destroy_data = true;
97
+ docs[6] = frt_doc_new();
98
+ frt_doc_add_field(docs[6], frt_df_add_data(frt_df_new(author),
99
+ frt_estrdup("Nadine Gordimer")))->destroy_data = true;
100
+ frt_doc_add_field(docs[6], frt_df_add_data(frt_df_new(title),
101
+ frt_estrdup("The Conservationist")))->destroy_data = true;
102
+ frt_doc_add_field(docs[6], frt_df_add_data(frt_df_new(year),
103
+ frt_estrdup("1974")))->destroy_data = true;
104
+ docs[7] = frt_doc_new();
105
+ frt_doc_add_field(docs[7], frt_df_add_data(frt_df_new(author),
106
+ frt_estrdup("Ruth Prawer Jhabvala")))->destroy_data = true;
107
+ frt_doc_add_field(docs[7], frt_df_add_data(frt_df_new(title),
108
+ frt_estrdup("Heat and Dust")))->destroy_data = true;
109
+ frt_doc_add_field(docs[7], frt_df_add_data(frt_df_new(year),
110
+ frt_estrdup("1975")))->destroy_data = true;
111
+ docs[8] = frt_doc_new();
112
+ frt_doc_add_field(docs[8], frt_df_add_data(frt_df_new(author),
113
+ frt_estrdup("David Storey")))->destroy_data = true;
114
+ frt_doc_add_field(docs[8], frt_df_add_data(frt_df_new(title),
115
+ frt_estrdup("Saville")))->destroy_data = true;
116
+ frt_doc_add_field(docs[8], frt_df_add_data(frt_df_new(year),
117
+ frt_estrdup("1976")))->destroy_data = true;
118
+ docs[9] = frt_doc_new();
119
+ frt_doc_add_field(docs[9], frt_df_add_data(frt_df_new(author),
120
+ frt_estrdup("Paul Scott")))->destroy_data = true;
121
+ frt_doc_add_field(docs[9], frt_df_add_data(frt_df_new(title),
122
+ frt_estrdup("Staying On")))->destroy_data = true;
123
+ frt_doc_add_field(docs[9], frt_df_add_data(frt_df_new(year),
124
+ frt_estrdup("1977")))->destroy_data = true;
125
+ docs[10] = frt_doc_new();
126
+ frt_doc_add_field(docs[10], frt_df_add_data(frt_df_new(author),
127
+ frt_estrdup("Iris Murdoch")))->destroy_data = true;
128
+ frt_doc_add_field(docs[10], frt_df_add_data(frt_df_new(title),
129
+ frt_estrdup("The Sea")))->destroy_data = true;
130
+ frt_doc_add_field(docs[10], frt_df_add_data(frt_df_new(year),
131
+ frt_estrdup("1978")))->destroy_data = true;
132
+ docs[11] = frt_doc_new();
133
+ frt_doc_add_field(docs[11], frt_df_add_data(frt_df_new(author),
134
+ frt_estrdup("Penelope Fitzgerald")))->destroy_data = true;
135
+ frt_doc_add_field(docs[11], frt_df_add_data(frt_df_new(title),
136
+ frt_estrdup("Offshore")))->destroy_data = true;
137
+ frt_doc_add_field(docs[11], frt_df_add_data(frt_df_new(year),
138
+ frt_estrdup("1979")))->destroy_data = true;
139
+ docs[12] = frt_doc_new();
140
+ frt_doc_add_field(docs[12], frt_df_add_data(frt_df_new(author),
141
+ frt_estrdup("William Golding")))->destroy_data = true;
142
+ frt_doc_add_field(docs[12], frt_df_add_data(frt_df_new(title),
143
+ frt_estrdup("Rites of Passage")))->destroy_data = true;
144
+ frt_doc_add_field(docs[12], frt_df_add_data(frt_df_new(year),
145
+ frt_estrdup("1980")))->destroy_data = true;
146
+ docs[13] = frt_doc_new();
147
+ frt_doc_add_field(docs[13], frt_df_add_data(frt_df_new(author),
148
+ frt_estrdup("Salman Rushdie")))->destroy_data = true;
149
+ frt_doc_add_field(docs[13], frt_df_add_data(frt_df_new(title),
150
+ frt_estrdup("Midnight's Children")))->destroy_data = true;
151
+ frt_doc_add_field(docs[13], frt_df_add_data(frt_df_new(year),
152
+ frt_estrdup("1981")))->destroy_data = true;
153
+ docs[14] = frt_doc_new();
154
+ frt_doc_add_field(docs[14], frt_df_add_data(frt_df_new(author),
155
+ frt_estrdup("Thomas Keneally")))->destroy_data = true;
156
+ frt_doc_add_field(docs[14], frt_df_add_data(frt_df_new(title),
157
+ frt_estrdup("Schindler's Ark")))->destroy_data = true;
158
+ frt_doc_add_field(docs[14], frt_df_add_data(frt_df_new(year),
159
+ frt_estrdup("1982")))->destroy_data = true;
160
+ docs[15] = frt_doc_new();
161
+ frt_doc_add_field(docs[15], frt_df_add_data(frt_df_new(author),
162
+ frt_estrdup("J. M. Coetzee")))->destroy_data = true;
163
+ frt_doc_add_field(docs[15], frt_df_add_data(frt_df_new(title),
164
+ frt_estrdup("Life and Times of Michael K")))->destroy_data = true;
165
+ frt_doc_add_field(docs[15], frt_df_add_data(frt_df_new(year),
166
+ frt_estrdup("1983")))->destroy_data = true;
167
+ docs[16] = frt_doc_new();
168
+ frt_doc_add_field(docs[16], frt_df_add_data(frt_df_new(author),
169
+ frt_estrdup("Anita Brookner")))->destroy_data = true;
170
+ frt_doc_add_field(docs[16], frt_df_add_data(frt_df_new(title),
171
+ frt_estrdup("Hotel du Lac")))->destroy_data = true;
172
+ frt_doc_add_field(docs[16], frt_df_add_data(frt_df_new(year),
173
+ frt_estrdup("1984")))->destroy_data = true;
174
+ docs[17] = frt_doc_new();
175
+ frt_doc_add_field(docs[17], frt_df_add_data(frt_df_new(author),
176
+ frt_estrdup("Keri Hulme")))->destroy_data = true;
177
+ frt_doc_add_field(docs[17], frt_df_add_data(frt_df_new(title),
178
+ frt_estrdup("The Bone People")))->destroy_data = true;
179
+ frt_doc_add_field(docs[17], frt_df_add_data(frt_df_new(year),
180
+ frt_estrdup("1985")))->destroy_data = true;
181
+ docs[18] = frt_doc_new();
182
+ frt_doc_add_field(docs[18], frt_df_add_data(frt_df_new(author),
183
+ frt_estrdup("Kingsley Amis")))->destroy_data = true;
184
+ frt_doc_add_field(docs[18], frt_df_add_data(frt_df_new(title),
185
+ frt_estrdup("The Old Devils")))->destroy_data = true;
186
+ frt_doc_add_field(docs[18], frt_df_add_data(frt_df_new(year),
187
+ frt_estrdup("1986")))->destroy_data = true;
188
+ docs[19] = frt_doc_new();
189
+ frt_doc_add_field(docs[19], frt_df_add_data(frt_df_new(author),
190
+ frt_estrdup("Penelope Lively")))->destroy_data = true;
191
+ frt_doc_add_field(docs[19], frt_df_add_data(frt_df_new(title),
192
+ frt_estrdup("Moon Tiger")))->destroy_data = true;
193
+ frt_doc_add_field(docs[19], frt_df_add_data(frt_df_new(year),
194
+ frt_estrdup("1987")))->destroy_data = true;
195
+ docs[20] = frt_doc_new();
196
+ frt_doc_add_field(docs[20], frt_df_add_data(frt_df_new(author),
197
+ frt_estrdup("Peter Carey")))->destroy_data = true;
198
+ frt_doc_add_field(docs[20], frt_df_add_data(frt_df_new(title),
199
+ frt_estrdup("Oscar and Lucinda")))->destroy_data = true;
200
+ frt_doc_add_field(docs[20], frt_df_add_data(frt_df_new(year),
201
+ frt_estrdup("1988")))->destroy_data = true;
202
+ docs[21] = frt_doc_new();
203
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(author),
204
+ frt_estrdup("Kazuo Ishiguro")))->destroy_data = true;
205
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(title),
206
+ frt_estrdup("The Remains of the Day")))->destroy_data = true;
207
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(year),
208
+ frt_estrdup("1989")))->destroy_data = true;
209
+ docs[22] = frt_doc_new();
210
+ frt_doc_add_field(docs[22], frt_df_add_data(frt_df_new(author),
211
+ frt_estrdup("A. S. Byatt")))->destroy_data = true;
212
+ frt_doc_add_field(docs[22], frt_df_add_data(frt_df_new(title),
213
+ frt_estrdup("Possession")))->destroy_data = true;
214
+ frt_doc_add_field(docs[22], frt_df_add_data(frt_df_new(year),
215
+ frt_estrdup("1990")))->destroy_data = true;
216
+ docs[23] = frt_doc_new();
217
+ frt_doc_add_field(docs[23], frt_df_add_data(frt_df_new(author),
218
+ frt_estrdup("Ben Okri")))->destroy_data = true;
219
+ frt_doc_add_field(docs[23], frt_df_add_data(frt_df_new(title),
220
+ frt_estrdup("The Famished Road")))->destroy_data = true;
221
+ frt_doc_add_field(docs[23], frt_df_add_data(frt_df_new(year),
222
+ frt_estrdup("1991")))->destroy_data = true;
223
+ docs[24] = frt_doc_new();
224
+ frt_doc_add_field(docs[24], frt_df_add_data(frt_df_new(author),
225
+ frt_estrdup("Michael Ondaatje")))->destroy_data = true;
226
+ frt_doc_add_field(docs[24], frt_df_add_data(frt_df_new(title),
227
+ frt_estrdup("The English Patient")))->destroy_data = true;
228
+ frt_doc_add_field(docs[24], frt_df_add_data(frt_df_new(year),
229
+ frt_estrdup("1992")))->destroy_data = true;
230
+ docs[25] = frt_doc_new();
231
+ frt_doc_add_field(docs[25], frt_df_add_data(frt_df_new(author),
232
+ frt_estrdup("Barry Unsworth")))->destroy_data = true;
233
+ frt_doc_add_field(docs[25], frt_df_add_data(frt_df_new(title),
234
+ frt_estrdup("Sacred Hunger")))->destroy_data = true;
235
+ frt_doc_add_field(docs[25], frt_df_add_data(frt_df_new(year),
236
+ frt_estrdup("1992")))->destroy_data = true;
237
+ docs[26] = frt_doc_new();
238
+ frt_doc_add_field(docs[26], frt_df_add_data(frt_df_new(author),
239
+ frt_estrdup("Roddy Doyle")))->destroy_data = true;
240
+ frt_doc_add_field(docs[26], frt_df_add_data(frt_df_new(title),
241
+ frt_estrdup("Paddy Clarke Ha Ha Ha")))->destroy_data = true;
242
+ frt_doc_add_field(docs[26], frt_df_add_data(frt_df_new(year),
243
+ frt_estrdup("1993")))->destroy_data = true;
244
+ docs[27] = frt_doc_new();
245
+ frt_doc_add_field(docs[27], frt_df_add_data(frt_df_new(author),
246
+ frt_estrdup("James Kelman")))->destroy_data = true;
247
+ frt_doc_add_field(docs[27], frt_df_add_data(frt_df_new(title),
248
+ frt_estrdup("How Late It Was, How Late")))->destroy_data = true;
249
+ frt_doc_add_field(docs[27], frt_df_add_data(frt_df_new(year),
250
+ frt_estrdup("1994")))->destroy_data = true;
251
+ docs[28] = frt_doc_new();
252
+ frt_doc_add_field(docs[28], frt_df_add_data(frt_df_new(author),
253
+ frt_estrdup("Pat Barker")))->destroy_data = true;
254
+ frt_doc_add_field(docs[28], frt_df_add_data(frt_df_new(title),
255
+ frt_estrdup("The Ghost Road")))->destroy_data = true;
256
+ frt_doc_add_field(docs[28], frt_df_add_data(frt_df_new(year),
257
+ frt_estrdup("1995")))->destroy_data = true;
258
+ docs[29] = frt_doc_new();
259
+ frt_doc_add_field(docs[29], frt_df_add_data(frt_df_new(author),
260
+ frt_estrdup("Graham Swift")))->destroy_data = true;
261
+ frt_doc_add_field(docs[29], frt_df_add_data(frt_df_new(title),
262
+ frt_estrdup("Last Orders")))->destroy_data = true;
263
+ frt_doc_add_field(docs[29], frt_df_add_data(frt_df_new(year),
264
+ frt_estrdup("1996")))->destroy_data = true;
265
+ docs[30] = frt_doc_new();
266
+ frt_doc_add_field(docs[30], frt_df_add_data(frt_df_new(author),
267
+ frt_estrdup("Arundati Roy")))->destroy_data = true;
268
+ frt_doc_add_field(docs[30], frt_df_add_data(frt_df_new(title),
269
+ frt_estrdup("The God of Small Things")))->destroy_data = true;
270
+ frt_doc_add_field(docs[30], frt_df_add_data(frt_df_new(year),
271
+ frt_estrdup("1997")))->destroy_data = true;
272
+ docs[31] = frt_doc_new();
273
+ frt_doc_add_field(docs[31], frt_df_add_data(frt_df_new(author),
274
+ frt_estrdup("Ian McEwan")))->destroy_data = true;
275
+ frt_doc_add_field(docs[31], frt_df_add_data(frt_df_new(title),
276
+ frt_estrdup("Amsterdam")))->destroy_data = true;
277
+ frt_doc_add_field(docs[31], frt_df_add_data(frt_df_new(year),
278
+ frt_estrdup("1998")))->destroy_data = true;
279
+ docs[32] = frt_doc_new();
280
+ frt_doc_add_field(docs[32], frt_df_add_data(frt_df_new(author),
281
+ frt_estrdup("J. M. Coetzee")))->destroy_data = true;
282
+ frt_doc_add_field(docs[32], frt_df_add_data(frt_df_new(title),
283
+ frt_estrdup("Disgrace")))->destroy_data = true;
284
+ frt_doc_add_field(docs[32], frt_df_add_data(frt_df_new(year),
285
+ frt_estrdup("1999")))->destroy_data = true;
286
+ docs[33] = frt_doc_new();
287
+ frt_doc_add_field(docs[33], frt_df_add_data(frt_df_new(author),
288
+ frt_estrdup("Margaret Atwood")))->destroy_data = true;
289
+ frt_doc_add_field(docs[33], frt_df_add_data(frt_df_new(title),
290
+ frt_estrdup("The Blind Assassin")))->destroy_data = true;
291
+ frt_doc_add_field(docs[33], frt_df_add_data(frt_df_new(year),
292
+ frt_estrdup("2000")))->destroy_data = true;
293
+ docs[34] = frt_doc_new();
294
+ frt_doc_add_field(docs[34], frt_df_add_data(frt_df_new(author),
295
+ frt_estrdup("Peter Carey")))->destroy_data = true;
296
+ frt_doc_add_field(docs[34], frt_df_add_data(frt_df_new(title),
297
+ frt_estrdup("True History of the Kelly Gang")))->destroy_data = true;
298
+ frt_doc_add_field(docs[34], frt_df_add_data(frt_df_new(year),
299
+ frt_estrdup("2001")))->destroy_data = true;
300
+ docs[35] = frt_doc_new();
301
+ frt_doc_add_field(docs[35], frt_df_add_data(frt_df_new(author),
302
+ frt_estrdup("Yann Martel")))->destroy_data = true;
303
+ frt_doc_add_field(docs[35], frt_df_add_data(frt_df_new(title),
304
+ frt_estrdup("The Life of Pi")))->destroy_data = true;
305
+ frt_doc_add_field(docs[35], frt_df_add_data(frt_df_new(year),
306
+ frt_estrdup("2002")))->destroy_data = true;
307
+ docs[36] = frt_doc_new();
308
+ frt_doc_add_field(docs[36], frt_df_add_data(frt_df_new(author),
309
+ frt_estrdup("DBC Pierre")))->destroy_data = true;
310
+ frt_doc_add_field(docs[36], frt_df_add_data(frt_df_new(title),
311
+ frt_estrdup("Vernon God Little")))->destroy_data = true;
312
+ frt_doc_add_field(docs[36], frt_df_add_data(frt_df_new(year),
313
+ frt_estrdup("2003")))->destroy_data = true;
314
+
315
+ return docs;
316
+ }
317
+
318
+ static void add_document_with_fields(FrtIndexWriter *iw, int i)
319
+ {
320
+ FrtDocument **docs = prep_book_list();
321
+ frt_iw_add_doc(iw, docs[i]);
322
+ destroy_docs(docs, BOOK_LIST_LENGTH);
323
+ }
324
+
325
+ static FrtIndexWriter *create_book_iw_conf(FrtStore *store, const FrtConfig *config)
326
+ {
327
+ FrtFieldInfos *fis = prep_book_fis();
328
+ frt_index_create(store, fis);
329
+ frt_fis_deref(fis);
330
+ return frt_iw_open(store, frt_whitespace_analyzer_new(false), config);
331
+ }
332
+
333
+ static FrtIndexWriter *create_book_iw(FrtStore *store)
334
+ {
335
+ return create_book_iw_conf(store, &frt_default_config);
336
+ }
337
+
338
+ #define IR_TEST_DOC_CNT 256
339
+
340
+ FrtDocument **prep_ir_test_docs()
341
+ {
342
+ int i;
343
+ char buf[2000] = "";
344
+ FrtDocument **docs = FRT_ALLOC_N(FrtDocument *, IR_TEST_DOC_CNT);
345
+ FrtDocField *df;
346
+
347
+ docs[0] = frt_doc_new();
348
+ frt_doc_add_field(docs[0], frt_df_add_data(frt_df_new(changing_field),
349
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 "
350
+ "word3 word3")))->destroy_data = true;
351
+ frt_doc_add_field(docs[0], frt_df_add_data(frt_df_new(compressed_field),
352
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 "
353
+ "word3 word3")))->destroy_data = true;
354
+ frt_doc_add_field(docs[0], frt_df_add_data(frt_df_new(body),
355
+ frt_estrdup("Where is Wally")))->destroy_data = true;
356
+ docs[1] = frt_doc_new();
357
+ frt_doc_add_field(docs[1], frt_df_add_data(frt_df_new(body),
358
+ frt_estrdup("Some Random Sentence read")))->destroy_data = true;
359
+ frt_doc_add_field(docs[1], frt_df_add_data(frt_df_new(tag),
360
+ frt_estrdup("id_test")))->destroy_data = true;
361
+ docs[2] = frt_doc_new();
362
+ frt_doc_add_field(docs[2], frt_df_add_data(frt_df_new(body),
363
+ frt_estrdup("Some read Random Sentence read")))->destroy_data = true;
364
+ df = frt_df_new(tag);
365
+ frt_df_add_data(df, frt_estrdup("one"));
366
+ frt_df_add_data(df, frt_estrdup("two"));
367
+ frt_df_add_data(df, frt_estrdup("three"));
368
+ frt_df_add_data(df, frt_estrdup("four"));
369
+ frt_doc_add_field(docs[2], df)->destroy_data = true;
370
+ df = frt_df_new(compressed_field);
371
+ frt_df_add_data(df, frt_estrdup("one"));
372
+ frt_df_add_data(df, frt_estrdup("two"));
373
+ frt_df_add_data(df, frt_estrdup("three"));
374
+ frt_df_add_data(df, frt_estrdup("four"));
375
+ frt_doc_add_field(docs[2], df)->destroy_data = true;
376
+ docs[3] = frt_doc_new();
377
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(title),
378
+ frt_estrdup("War And Peace")))->destroy_data = true;
379
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(body),
380
+ frt_estrdup("word3 word4 word1 word2 word1 "
381
+ "word3 word4 word1 word3 word3")))->destroy_data = true;
382
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(author),
383
+ frt_estrdup("Leo Tolstoy")))->destroy_data = true;
384
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(year),
385
+ frt_estrdup("1865")))->destroy_data = true;
386
+ frt_doc_add_field(docs[3], frt_df_add_data(frt_df_new(text),
387
+ frt_estrdup("more text which is not stored")))->destroy_data = true;
388
+ docs[4] = frt_doc_new();
389
+ frt_doc_add_field(docs[4], frt_df_add_data(frt_df_new(body),
390
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
391
+ docs[5] = frt_doc_new();
392
+ frt_doc_add_field(docs[5], frt_df_add_data(frt_df_new(body),
393
+ frt_estrdup("Here's Wally")))->destroy_data = true;
394
+ frt_doc_add_field(docs[5], frt_df_add_data(frt_df_new(text),
395
+ frt_estrdup("so_that_norm_can_be_set")))->destroy_data = true;
396
+ docs[6] = frt_doc_new();
397
+ frt_doc_add_field(docs[6], frt_df_add_data(frt_df_new(body),
398
+ frt_estrdup("Some Random Sentence read read read read"
399
+ )))->destroy_data = true;
400
+ docs[7] = frt_doc_new();
401
+ frt_doc_add_field(docs[7], frt_df_add_data(frt_df_new(body),
402
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
403
+ docs[8] = frt_doc_new();
404
+ frt_doc_add_field(docs[8], frt_df_add_data(frt_df_new(body),
405
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
406
+ docs[9] = frt_doc_new();
407
+ frt_doc_add_field(docs[9], frt_df_add_data(frt_df_new(body),
408
+ frt_estrdup("read Some Random Sentence read this will be used after "
409
+ "unfinished next position read")))->destroy_data = true;
410
+ docs[10] = frt_doc_new();
411
+ frt_doc_add_field(docs[10], frt_df_add_data(frt_df_new(body),
412
+ frt_estrdup("Some read Random Sentence")))->destroy_data = true;
413
+ frt_doc_add_field(docs[10], frt_df_add_data(frt_df_new(changing_field),
414
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 word3 "
415
+ "word3")))->destroy_data = true;
416
+ docs[11] = frt_doc_new();
417
+ frt_doc_add_field(docs[11], frt_df_add_data(frt_df_new(body),
418
+ frt_estrdup("And here too. Well, maybe Not")))->destroy_data = true;
419
+ docs[12] = frt_doc_new();
420
+ frt_doc_add_field(docs[12], frt_df_add_data(frt_df_new(title),
421
+ frt_estrdup("Shawshank Redemption")))->destroy_data = true;
422
+ frt_doc_add_field(docs[12], frt_df_add_data(frt_df_new(body),
423
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
424
+ docs[13] = frt_doc_new();
425
+ frt_doc_add_field(docs[13], frt_df_add_data(frt_df_new(body),
426
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
427
+ docs[14] = frt_doc_new();
428
+ frt_doc_add_field(docs[14], frt_df_add_data(frt_df_new(body),
429
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
430
+ docs[15] = frt_doc_new();
431
+ frt_doc_add_field(docs[15], frt_df_add_data(frt_df_new(body),
432
+ frt_estrdup("Some read Random Sentence")))->destroy_data = true;
433
+ docs[16] = frt_doc_new();
434
+ frt_doc_add_field(docs[16], frt_df_add_data(frt_df_new(body),
435
+ frt_estrdup("Some Random read read Sentence")))->destroy_data = true;
436
+ docs[17] = frt_doc_new();
437
+ frt_doc_add_field(docs[17], frt_df_add_data(frt_df_new(body),
438
+ frt_estrdup("Some Random read Sentence")))->destroy_data = true;
439
+ frt_doc_add_field(docs[17], frt_df_add_data(frt_df_new(changing_field),
440
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 word3 "
441
+ "word3")))->destroy_data = true;
442
+ docs[18] = frt_doc_new();
443
+ frt_doc_add_field(docs[18], frt_df_add_data(frt_df_new(body),
444
+ frt_estrdup("Wally Wally Wally")))->destroy_data = true;
445
+ docs[19] = frt_doc_new();
446
+ frt_doc_add_field(docs[19], frt_df_add_data(frt_df_new(body),
447
+ frt_estrdup("Some Random Sentence")))->destroy_data = true;
448
+ frt_doc_add_field(docs[19], frt_df_add_data(frt_df_new(changing_field),
449
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 word3 "
450
+ "word3")))->destroy_data = true;
451
+ docs[20] = frt_doc_new();
452
+ frt_doc_add_field(docs[20], frt_df_add_data(frt_df_new(body),
453
+ frt_estrdup("Wally is where Wally usually likes to go. Wally Mart! "
454
+ "Wally likes shopping there for Where's Wally books. "
455
+ "Wally likes to read")))->destroy_data = true;
456
+ frt_doc_add_field(docs[20], frt_df_add_data(frt_df_new(changing_field),
457
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 word3 "
458
+ "word3")))->destroy_data = true;
459
+ docs[21] = frt_doc_new();
460
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(body),
461
+ frt_estrdup("Some Random Sentence read read read and more read read "
462
+ "read")))->destroy_data = true;
463
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(changing_field),
464
+ frt_estrdup("word3 word4 word1 word2 word1 word3 word4 word1 word3 "
465
+ "word3")))->destroy_data = true;
466
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(rb_intern("new field")),
467
+ frt_estrdup("zdata znot zto zbe zfound")))->destroy_data = true;
468
+ frt_doc_add_field(docs[21], frt_df_add_data(frt_df_new(title),
469
+ frt_estrdup("title_too_long_for_max_word_lengthxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")))->destroy_data = true;
470
+
471
+ for (i = 1; i < 22; i++) {
472
+ strcat(buf, "skip ");
473
+ }
474
+ for (i = 22; i < IR_TEST_DOC_CNT; i++) {
475
+ strcat(buf, "skip ");
476
+ docs[i] = frt_doc_new();
477
+ frt_doc_add_field(docs[i], frt_df_add_data(frt_df_new(text),
478
+ frt_estrdup(buf)))->destroy_data = true;
479
+ }
480
+ return docs;
481
+ }
482
+
483
+ #define NUM_STDE_TEST_DOCS 50
484
+ #define MAX_TEST_WORDS 1000
485
+
486
+ static void prep_stde_test_docs(FrtDocument **docs, int doc_cnt, int num_words,
487
+ FrtFieldInfos *fis)
488
+ {
489
+ int i, j;
490
+ char *buf = FRT_ALLOC_N(char, num_words * (TEST_WORD_LIST_MAX_LEN + 1));
491
+ for (i = 0; i < doc_cnt; i++) {
492
+ docs[i] = frt_doc_new();
493
+ for (j = 0; j < fis->size; j++) {
494
+ if ((rand() % 2) == 0) {
495
+ FrtDocField *df = frt_df_new(fis->fields[j]->name);
496
+ frt_df_add_data(df, frt_estrdup(make_random_string(buf, num_words)));
497
+ df->destroy_data = true;
498
+ frt_doc_add_field(docs[i], df);
499
+ }
500
+ }
501
+ }
502
+ free(buf);
503
+ }
504
+
505
+ static void prep_test_1seg_index(FrtStore *store, FrtDocument **docs,
506
+ int doc_cnt, FrtFieldInfos *fis)
507
+ {
508
+ int i;
509
+ FrtDocWriter *dw;
510
+ FrtIndexWriter *iw;
511
+ FrtSegmentInfo *si = frt_si_new(frt_estrdup("_0"), doc_cnt, store);
512
+
513
+ frt_index_create(store, fis);
514
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
515
+
516
+ dw = frt_dw_open(iw, si);
517
+
518
+ for (i = 0; i < doc_cnt; i++) {
519
+ frt_dw_add_doc(dw, docs[i]);
520
+ }
521
+
522
+ frt_dw_close(dw);
523
+ frt_iw_close(iw);
524
+ frt_si_deref(si);
525
+ }
526
+
527
+ /****************************************************************************
528
+ *
529
+ * TermDocEnum
530
+ *
531
+ ****************************************************************************/
532
+
533
+ static void test_segment_term_doc_enum(TestCase *tc, void *data)
534
+ {
535
+ int i, j;
536
+ FrtStore *store = (FrtStore *)data;
537
+ FrtFieldInfos *fis = prep_all_fis();
538
+ FrtFieldInfo *fi;
539
+ FrtSegmentFieldIndex *sfi;
540
+ FrtTermInfosReader *tir;
541
+ int skip_interval;
542
+ FrtInStream *frq_in, *prx_in;
543
+ FrtBitVector *bv = NULL;
544
+ FrtTermDocEnum *tde, *tde_reader, *tde_skip_to;
545
+ char buf[TEST_WORD_LIST_MAX_LEN + 1];
546
+ FrtDocField *df;
547
+ FrtDocument *docs[NUM_STDE_TEST_DOCS], *doc;
548
+
549
+ prep_stde_test_docs(docs, NUM_STDE_TEST_DOCS, MAX_TEST_WORDS, fis);
550
+ prep_test_1seg_index(store, docs, NUM_STDE_TEST_DOCS, fis);
551
+
552
+ sfi = frt_sfi_open(store, "_0");
553
+ tir = frt_tir_open(store, sfi, "_0");
554
+ skip_interval = ((FrtSegmentTermEnum *)tir->orig_te)->skip_interval;
555
+ frq_in = store->open_input(store, "_0.frq");
556
+ prx_in = store->open_input(store, "_0.prx");
557
+ tde = frt_stde_new(tir, frq_in, bv, skip_interval);
558
+ tde_reader = frt_stde_new(tir, frq_in, bv, skip_interval);
559
+ tde_skip_to = frt_stde_new(tir, frq_in, bv, skip_interval);
560
+
561
+ fi = frt_fis_get_field(fis, rb_intern("tv"));
562
+ for (i = 0; i < 300; i++) {
563
+ int cnt = 0, ind = 0, doc_nums[3], freqs[3];
564
+ const char *word = test_word_list[rand()%TEST_WORD_LIST_SIZE];
565
+ tde->seek(tde, fi->number, word);
566
+ tde_reader->seek(tde_reader, fi->number, word);
567
+ while (tde->next(tde)) {
568
+ if (cnt == ind) {
569
+ cnt = tde_reader->read(tde_reader, doc_nums, freqs, 3);
570
+ ind = 0;
571
+ }
572
+ Aiequal(doc_nums[ind], tde->doc_num(tde));
573
+ Aiequal(freqs[ind], tde->freq(tde));
574
+ ind++;
575
+
576
+ doc = docs[tde->doc_num(tde)];
577
+ df = frt_doc_get_field(doc, fi->name);
578
+ if (Apnotnull(df)) {
579
+ Assert(strstr((char *)df->data[0], word) != NULL,
580
+ "%s not found in doc[%d]\n\"\"\"\n%s\n\"\"\"\n",
581
+ word, tde->doc_num(tde), df->data[0]);
582
+ }
583
+ tde_skip_to->seek(tde_skip_to, fi->number, word);
584
+ Atrue(tde_skip_to->skip_to(tde_skip_to, tde->doc_num(tde)));
585
+ Aiequal(tde->doc_num(tde), tde_skip_to->doc_num(tde_skip_to));
586
+ Aiequal(tde->freq(tde), tde_skip_to->freq(tde_skip_to));
587
+ }
588
+ Aiequal(ind, cnt);
589
+
590
+ Atrue(! tde->next(tde));
591
+ Atrue(! tde->next(tde));
592
+ Atrue(! tde->skip_to(tde, 0));
593
+ Atrue(! tde->skip_to(tde, 1000000));
594
+ }
595
+ tde->close(tde);
596
+ tde_reader->close(tde_reader);
597
+ tde_skip_to->close(tde_skip_to);
598
+
599
+
600
+ tde = frt_stpe_new(tir, frq_in, prx_in, bv, skip_interval);
601
+ tde_skip_to = frt_stpe_new(tir, frq_in, prx_in, bv, skip_interval);
602
+
603
+ fi = frt_fis_get_field(fis, rb_intern("tv+offsets"));
604
+ for (i = 0; i < 200; i++) {
605
+ const char *word = test_word_list[rand()%TEST_WORD_LIST_SIZE];
606
+ tde->seek(tde, fi->number, word);
607
+ while (tde->next(tde)) {
608
+ tde_skip_to->seek(tde_skip_to, fi->number, word);
609
+ Atrue(tde_skip_to->skip_to(tde_skip_to, tde->doc_num(tde)));
610
+ Aiequal(tde->doc_num(tde), tde_skip_to->doc_num(tde_skip_to));
611
+ Aiequal(tde->freq(tde), tde_skip_to->freq(tde_skip_to));
612
+
613
+ doc = docs[tde->doc_num(tde)];
614
+ df = frt_doc_get_field(doc, fi->name);
615
+ if (Apnotnull(df)) {
616
+ Assert(strstr((char *)df->data[0], word) != NULL,
617
+ "%s not found in doc[%d]\n\"\"\"\n%s\n\"\"\"\n",
618
+ word, tde->doc_num(tde), df->data[0]);
619
+ for (j = tde->freq(tde); j > 0; j--) {
620
+ int pos = tde->next_position(tde), t;
621
+ Aiequal(pos, tde_skip_to->next_position(tde_skip_to));
622
+ Asequal(word, get_nth_word(df->data[0], buf, pos, &t, &t));
623
+ }
624
+ }
625
+ }
626
+ Atrue(! tde->next(tde));
627
+ Atrue(! tde->next(tde));
628
+ Atrue(! tde->skip_to(tde, 0));
629
+ Atrue(! tde->skip_to(tde, 1000000));
630
+
631
+ }
632
+ tde->close(tde);
633
+ tde_skip_to->close(tde_skip_to);
634
+
635
+ for (i = 0; i < NUM_STDE_TEST_DOCS; i++) {
636
+ frt_doc_destroy(docs[i]);
637
+ }
638
+ frt_fis_deref(fis);
639
+ frt_is_close(frq_in);
640
+ frt_is_close(prx_in);
641
+ frt_tir_close(tir);
642
+ frt_sfi_close(sfi);
643
+ }
644
+
645
+ const char *double_word = "word word";
646
+ const char *triple_word = "word word word";
647
+
648
+ static void test_segment_tde_deleted_docs(TestCase *tc, void *data)
649
+ {
650
+ int i, doc_num_expected, skip_interval;
651
+ FrtStore *store = (FrtStore *)data;
652
+ FrtDocWriter *dw;
653
+ FrtDocument *doc;
654
+ FrtIndexWriter *iw = create_book_iw(store);
655
+ FrtSegmentFieldIndex *sfi;
656
+ FrtTermInfosReader *tir;
657
+ FrtInStream *frq_in, *prx_in;
658
+ FrtBitVector *bv = frt_bv_new();
659
+ FrtTermDocEnum *tde;
660
+ FrtSegmentInfo *si = frt_si_new(frt_estrdup("_0"), NUM_STDE_TEST_DOCS, store);
661
+
662
+ dw = frt_dw_open(iw, si);
663
+
664
+ for (i = 0; i < NUM_STDE_TEST_DOCS; i++) {
665
+ doc = frt_doc_new();
666
+ if ((rand() % 2) == 0) {
667
+ frt_bv_set(bv, i);
668
+ Aiequal(1, frt_bv_get(bv, i));
669
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern("f")), (char *)double_word));
670
+ }
671
+ else {
672
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern("f")), (char *)triple_word));
673
+ }
674
+ frt_dw_add_doc(dw, doc);
675
+ frt_doc_destroy(doc);
676
+ }
677
+ Aiequal(NUM_STDE_TEST_DOCS, dw->doc_num);
678
+ frt_dw_close(dw);
679
+ frt_iw_close(iw);
680
+
681
+ sfi = frt_sfi_open(store, "_0");
682
+ tir = frt_tir_open(store, sfi, "_0");
683
+ frq_in = store->open_input(store, "_0.frq");
684
+ prx_in = store->open_input(store, "_0.prx");
685
+ skip_interval = sfi->skip_interval;
686
+ tde = frt_stpe_new(tir, frq_in, prx_in, bv, skip_interval);
687
+
688
+ tde->seek(tde, 0, "word");
689
+ doc_num_expected = 0;
690
+ while (tde->next(tde)) {
691
+ while (frt_bv_get(bv, doc_num_expected)) {
692
+ doc_num_expected++;
693
+ }
694
+ Aiequal(doc_num_expected, tde->doc_num(tde));
695
+ if (Aiequal(3, tde->freq(tde))) {
696
+ for (i = 0; i < 3; i++) {
697
+ Aiequal(i, tde->next_position(tde));
698
+ }
699
+ }
700
+ doc_num_expected++;
701
+ }
702
+ tde->close(tde);
703
+
704
+ frt_bv_destroy(bv);
705
+ frt_is_close(frq_in);
706
+ frt_is_close(prx_in);
707
+ frt_tir_close(tir);
708
+ frt_sfi_close(sfi);
709
+ frt_si_deref(si);
710
+ }
711
+
712
+ /****************************************************************************
713
+ *
714
+ * Index
715
+ *
716
+ ****************************************************************************/
717
+
718
+ static void test_index_create(TestCase *tc, void *data)
719
+ {
720
+ FrtStore *store = (FrtStore *)data;
721
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
722
+ (void)tc;
723
+
724
+ store->clear_all(store);
725
+ Assert(!store->exists(store, "segments"),
726
+ "segments shouldn't exist yet");
727
+ frt_index_create(store, fis);
728
+ Assert(store->exists(store, "segments"),
729
+ "segments should now exist");
730
+ frt_fis_deref(fis);
731
+ }
732
+
733
+ static void test_index_version(TestCase *tc, void *data)
734
+ {
735
+ frt_u64 version;
736
+ FrtStore *store = (FrtStore *)data;
737
+ FrtIndexWriter *iw = create_book_iw(store);
738
+ FrtIndexReader *ir;
739
+
740
+ add_document_with_fields(iw, 0);
741
+ Atrue(frt_index_is_locked(store)); /* writer open, so dir is locked */
742
+ frt_iw_close(iw);
743
+ Atrue(!frt_index_is_locked(store));
744
+ ir = frt_ir_open(store);
745
+ Atrue(!frt_index_is_locked(store)); /* reader only, no lock */
746
+ version = frt_sis_read_current_version(store);
747
+ frt_ir_close(ir);
748
+
749
+ /* modify index and check version has been incremented: */
750
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &frt_default_config);
751
+ add_document_with_fields(iw, 1);
752
+ frt_iw_close(iw);
753
+ ir = frt_ir_open(store);
754
+ Atrue(version < frt_sis_read_current_version(store));
755
+ Atrue(frt_ir_is_latest(ir));
756
+ frt_ir_close(ir);
757
+ }
758
+
759
+ static void test_index_undelete_all_after_close(TestCase *tc, void *data)
760
+ {
761
+ FrtStore *store = (FrtStore *)data;
762
+ FrtIndexWriter *iw = create_book_iw(store);
763
+ FrtIndexReader *ir;
764
+ add_document_with_fields(iw, 0);
765
+ add_document_with_fields(iw, 1);
766
+ frt_iw_close(iw);
767
+ ir = frt_ir_open(store);
768
+ frt_ir_delete_doc(ir, 0);
769
+ frt_ir_delete_doc(ir, 1);
770
+ frt_ir_close(ir);
771
+ ir = frt_ir_open(store);
772
+ frt_ir_undelete_all(ir);
773
+ Aiequal(2, ir->num_docs(ir)); /* nothing has really been deleted */
774
+ frt_ir_close(ir);
775
+ ir = frt_ir_open(store);
776
+ Aiequal(2, ir->num_docs(ir)); /* nothing has really been deleted */
777
+ Atrue(frt_ir_is_latest(ir));
778
+ frt_ir_close(ir);
779
+ }
780
+
781
+ /****************************************************************************
782
+ *
783
+ * IndexWriter
784
+ *
785
+ ****************************************************************************/
786
+
787
+ static void test_fld_inverter(TestCase *tc, void *data)
788
+ {
789
+ FrtStore *store = (FrtStore *)data;
790
+ FrtHash *plists;
791
+ FrtHash *curr_plists;
792
+ FrtPosting *p;
793
+ FrtPostingList *pl;
794
+ FrtDocWriter *dw;
795
+ FrtIndexWriter *iw = create_book_iw(store);
796
+ FrtDocField *df;
797
+
798
+ dw = frt_dw_open(iw, frt_sis_new_segment(iw->sis, 0, iw->store));
799
+
800
+ df = frt_df_new(rb_intern("no tv"));
801
+ frt_df_add_data(df, (char *)"one two three four five two three four five three four five four five");
802
+ frt_df_add_data(df, (char *)"ichi ni san yon go ni san yon go san yon go yon go go");
803
+ frt_df_add_data(df, (char *)"The quick brown fox jumped over five lazy dogs");
804
+
805
+ curr_plists = frt_dw_invert_field(
806
+ dw,
807
+ frt_dw_get_fld_inv(dw, frt_fis_get_or_add_field(dw->fis, df->name)),
808
+ df);
809
+
810
+ Aiequal(18, curr_plists->size);
811
+
812
+ plists = ((FrtFieldInverter *)frt_h_get_int(
813
+ dw->fields, frt_fis_get_field(dw->fis, df->name)->number))->plists;
814
+
815
+
816
+ pl = (FrtPostingList *)frt_h_get(curr_plists, "one");
817
+ if (Apnotnull(pl)) {
818
+ Asequal("one", pl->term);
819
+ Aiequal(3, pl->term_len);
820
+
821
+ p = pl->last;
822
+ Aiequal(1, p->freq);
823
+ Apequal(p->first_occ, pl->last_occ);
824
+ Apnull(p->first_occ->next);
825
+ Aiequal(0, p->first_occ->pos);
826
+ Apequal(pl, ((FrtPostingList *)frt_h_get(plists, "one")));
827
+ }
828
+
829
+ pl = (FrtPostingList *)frt_h_get(curr_plists, "five");
830
+ if (Apnotnull(pl)) {
831
+ Asequal("five", pl->term);
832
+ Aiequal(4, pl->term_len);
833
+ Apnull(pl->last_occ->next);
834
+ p = pl->last;
835
+ Aiequal(5, p->freq);
836
+ Aiequal(4, p->first_occ->pos);
837
+ Aiequal(8, p->first_occ->next->pos);
838
+ Aiequal(11, p->first_occ->next->next->pos);
839
+ Aiequal(13, p->first_occ->next->next->next->pos);
840
+ Aiequal(35, p->first_occ->next->next->next->next->pos);
841
+ Apequal(pl, ((FrtPostingList *)frt_h_get(plists, "five")));
842
+ }
843
+
844
+ frt_df_destroy(df);
845
+
846
+ df = frt_df_new(rb_intern("no tv"));
847
+ frt_df_add_data(df, (char *)"seven new words and six old ones");
848
+ frt_df_add_data(df, (char *)"ichi ni one two quick dogs");
849
+
850
+ dw->doc_num++;
851
+ frt_dw_reset_postings(dw->curr_plists);
852
+
853
+ curr_plists = frt_dw_invert_field(
854
+ dw,
855
+ frt_dw_get_fld_inv(dw, frt_fis_get_or_add_field(dw->fis, df->name)),
856
+ df);
857
+
858
+ Aiequal(13, curr_plists->size);
859
+
860
+ pl = (FrtPostingList *)frt_h_get(curr_plists, "one");
861
+ if (Apnotnull(pl)) {
862
+ Asequal("one", pl->term);
863
+ Aiequal(3, pl->term_len);
864
+
865
+ p = pl->first;
866
+ Aiequal(1, p->freq);
867
+ Apnull(p->first_occ->next);
868
+ Aiequal(0, p->first_occ->pos);
869
+
870
+ p = pl->last;
871
+ Aiequal(1, p->freq);
872
+ Apequal(p->first_occ, pl->last_occ);
873
+ Apnull(p->first_occ->next);
874
+ Aiequal(9, p->first_occ->pos);
875
+ Apequal(pl, ((FrtPostingList *)frt_h_get(plists, "one")));
876
+ }
877
+
878
+ frt_df_destroy(df);
879
+
880
+ frt_dw_close(dw);
881
+ frt_iw_close(iw);
882
+ }
883
+
884
+ #define NUM_POSTINGS TEST_WORD_LIST_SIZE
885
+ static void test_postings_sorter(TestCase *tc, void *data)
886
+ {
887
+ int i;
888
+ FrtPostingList plists[NUM_POSTINGS], *p_ptr[NUM_POSTINGS];
889
+ (void)data, (void)tc;
890
+ for (i = 0; i < NUM_POSTINGS; i++) {
891
+ plists[i].term = (char *)test_word_list[i];
892
+ p_ptr[i] = &plists[i];
893
+ }
894
+
895
+ qsort(p_ptr, NUM_POSTINGS, sizeof(FrtPostingList *),
896
+ (int (*)(const void *, const void *))&frt_pl_cmp);
897
+
898
+ for (i = 1; i < NUM_POSTINGS; i++) {
899
+ Assert(strcmp(p_ptr[i - 1]->term, p_ptr[i]->term) <= 0,
900
+ "\"%s\" > \"%s\"", p_ptr[i - 1]->term, p_ptr[i]->term);
901
+ }
902
+ }
903
+
904
+ static void test_iw_add_doc(TestCase *tc, void *data)
905
+ {
906
+ FrtStore *store = (FrtStore *)data;
907
+ FrtIndexWriter *iw = create_book_iw(store);
908
+ FrtDocument **docs = prep_book_list();
909
+
910
+ frt_iw_add_doc(iw, docs[0]);
911
+ Aiequal(1, frt_iw_doc_count(iw));
912
+ Assert(!store->exists(store, "_0.cfs"),
913
+ "data shouldn't have been written yet");
914
+ frt_iw_commit(iw);
915
+ Assert(store->exists(store, "_0.cfs"), "data should now be written");
916
+ frt_iw_close(iw);
917
+ Assert(store->exists(store, "_0.cfs"), "data should still be there");
918
+
919
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &frt_default_config);
920
+ frt_iw_add_doc(iw, docs[1]);
921
+ Aiequal(2, frt_iw_doc_count(iw));
922
+ Assert(!store->exists(store, "_1.cfs"),
923
+ "data shouldn't have been written yet");
924
+ Assert(store->exists(store, "_0.cfs"), "data should still be there");
925
+ frt_iw_commit(iw);
926
+ Assert(store->exists(store, "_1.cfs"), "data should now be written");
927
+ frt_iw_close(iw);
928
+ Assert(store->exists(store, "_1.cfs"), "data should still be there");
929
+ Assert(store->exists(store, "_0.cfs"), "data should still be there");
930
+
931
+ destroy_docs(docs, BOOK_LIST_LENGTH);
932
+ }
933
+
934
+ /*
935
+ * Make sure we can open an index for create even when a
936
+ * reader holds it open (this fails pre lock-less
937
+ * commits on windows):
938
+ */
939
+ static void test_create_with_reader(TestCase *tc, void *data)
940
+ {
941
+ FrtStore *store = frt_open_fs_store(TEST_DIR);
942
+ (void)data;
943
+ FrtIndexWriter *iw;
944
+ FrtIndexReader *ir, *ir2;
945
+ FrtDocument *doc = prep_book();
946
+ store->clear_all(store);
947
+
948
+ /* add one document & close writer */
949
+ iw = create_book_iw(store);
950
+ frt_iw_add_doc(iw, doc);
951
+ frt_iw_close(iw);
952
+
953
+ /* now open reader: */
954
+ ir = frt_ir_open(store);
955
+ Aiequal(1, ir->num_docs(ir));
956
+
957
+ /* now open index for create: */
958
+ iw = create_book_iw(store);
959
+ Aiequal(0, frt_iw_doc_count(iw));
960
+ frt_iw_add_doc(iw, doc);
961
+ frt_iw_close(iw);
962
+
963
+ Aiequal(1, ir->num_docs(ir));
964
+ ir2 = frt_ir_open(store);
965
+ Aiequal(1, ir2->num_docs(ir));
966
+ frt_ir_close(ir);
967
+ frt_ir_close(ir2);
968
+ store->clear_all(store);
969
+ frt_store_deref(store);
970
+ frt_doc_destroy(doc);
971
+ }
972
+
973
+ /*
974
+ * Simulate a writer that crashed while writing segments
975
+ * file: make sure we can still open the index (ie,
976
+ * gracefully fallback to the previous segments file),
977
+ * and that we can add to the index:
978
+ */
979
+ static void test_simulated_crashed_writer(TestCase *tc, void *data)
980
+ {
981
+ int i;
982
+ long gen;
983
+ off_t length;
984
+ FrtStore *store = (FrtStore *)data;
985
+ FrtIndexWriter *iw;
986
+ FrtIndexReader *ir;
987
+ char file_name_in[FRT_SEGMENT_NAME_MAX_LENGTH];
988
+ char file_name_out[FRT_SEGMENT_NAME_MAX_LENGTH];
989
+ FrtInStream *is;
990
+ FrtOutStream *os;
991
+ FrtDocument **docs = prep_book_list();
992
+ FrtConfig config = frt_default_config;
993
+ config.max_buffered_docs = 3;
994
+
995
+ iw = create_book_iw_conf(store, &config);
996
+ for (i = 0; i < BOOK_LIST_LENGTH; i++) {
997
+ frt_iw_add_doc(iw, docs[i]);
998
+ }
999
+
1000
+ /* close */
1001
+ frt_iw_close(iw);
1002
+
1003
+ gen = frt_sis_current_segment_generation(store);
1004
+ /* segment generation should be > 1 */
1005
+ Atrue(gen > 1);
1006
+
1007
+ /* Make the next segments file, with last byte
1008
+ * missing, to simulate a writer that crashed while
1009
+ * writing segments file: */
1010
+ frt_sis_curr_seg_file_name(file_name_in, store);
1011
+ frt_fn_for_generation(file_name_out, FRT_SEGMENTS_FILE_NAME, NULL, 1 + gen);
1012
+ is = store->open_input(store, file_name_in);
1013
+ os = store->new_output(store, file_name_out);
1014
+ length = frt_is_length(is);
1015
+ for(i = 0; i < length - 1; i++) {
1016
+ frt_os_write_byte(os, frt_is_read_byte(is));
1017
+ }
1018
+ frt_is_close(is);
1019
+ frt_os_close(os);
1020
+
1021
+ ir = frt_ir_open(store);
1022
+ frt_ir_close(ir);
1023
+
1024
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1025
+
1026
+ /* add all books */
1027
+ for (i = 0; i < BOOK_LIST_LENGTH; i++) {
1028
+ frt_iw_add_doc(iw, docs[i]);
1029
+ }
1030
+
1031
+ destroy_docs(docs, BOOK_LIST_LENGTH);
1032
+ frt_iw_close(iw);
1033
+ }
1034
+
1035
+ /*
1036
+ * Simulate a corrupt index by removing last byte of
1037
+ * latest segments file and make sure we get an
1038
+ * IOException trying to open the index:
1039
+ */
1040
+ static void test_simulated_corrupt_index1(TestCase *tc, void *data)
1041
+ {
1042
+ int i;
1043
+ long gen;
1044
+ off_t length;
1045
+ FrtStore *store = (FrtStore *)data;
1046
+ FrtIndexWriter *iw;
1047
+ FrtIndexReader *ir;
1048
+ char file_name_in[FRT_SEGMENT_NAME_MAX_LENGTH];
1049
+ char file_name_out[FRT_SEGMENT_NAME_MAX_LENGTH];
1050
+ FrtInStream *is;
1051
+ FrtOutStream *os;
1052
+ FrtDocument **docs = prep_book_list();
1053
+ FrtConfig config = frt_default_config;
1054
+ config.max_buffered_docs = 3;
1055
+
1056
+ iw = create_book_iw_conf(store, &config);
1057
+ for (i = 0; i < BOOK_LIST_LENGTH; i++) {
1058
+ frt_iw_add_doc(iw, docs[i]);
1059
+ }
1060
+
1061
+ /* close */
1062
+ frt_iw_close(iw);
1063
+
1064
+ gen = frt_sis_current_segment_generation(store);
1065
+ /* segment generation should be > 1 */
1066
+ Atrue(gen > 1);
1067
+
1068
+ /* Make the next segments file, with last byte
1069
+ * missing, to simulate a writer that crashed while
1070
+ * writing segments file: */
1071
+ frt_sis_curr_seg_file_name(file_name_in, store);
1072
+ frt_fn_for_generation(file_name_out, FRT_SEGMENTS_FILE_NAME, "", 1 + gen);
1073
+ is = store->open_input(store, file_name_in);
1074
+ os = store->new_output(store, file_name_out);
1075
+ length = frt_is_length(is);
1076
+ for(i = 0; i < length - 1; i++) {
1077
+ frt_os_write_byte(os, frt_is_read_byte(is));
1078
+ }
1079
+ frt_is_close(is);
1080
+ frt_os_close(os);
1081
+ store->remove(store, file_name_in);
1082
+
1083
+ FRT_TRY
1084
+ ir = frt_ir_open(store);
1085
+ frt_ir_close(ir);
1086
+ Afail("reader should have failed to open on a crashed index");
1087
+ break;
1088
+ case FRT_IO_ERROR:
1089
+ FRT_HANDLED();
1090
+ break;
1091
+ default:
1092
+ Afail("reader should have raised an FRT_IO_ERROR");
1093
+ FRT_HANDLED();
1094
+ FRT_XENDTRY
1095
+ destroy_docs(docs, BOOK_LIST_LENGTH);
1096
+ }
1097
+
1098
+ /*
1099
+ * Simulate a corrupt index by removing one of the cfs
1100
+ * files and make sure we get an IOException trying to
1101
+ * open the index:
1102
+ */
1103
+ static void test_simulated_corrupt_index2(TestCase *tc, void *data)
1104
+ {
1105
+ int i;
1106
+ long gen;
1107
+ FrtStore *store = (FrtStore *)data;
1108
+ FrtIndexWriter *iw;
1109
+ FrtIndexReader *ir;
1110
+ FrtDocument **docs = prep_book_list();
1111
+ FrtConfig config = frt_default_config;
1112
+ config.max_buffered_docs = 10;
1113
+
1114
+ iw = create_book_iw_conf(store, &config);
1115
+ for (i = 0; i < BOOK_LIST_LENGTH; i++) {
1116
+ frt_iw_add_doc(iw, docs[i]);
1117
+ }
1118
+
1119
+ /* close */
1120
+ frt_iw_close(iw);
1121
+
1122
+ gen = frt_sis_current_segment_generation(store);
1123
+ /* segment generation should be > 1 */
1124
+ Atrue(gen > 1);
1125
+
1126
+ Atrue(store->exists(store, "_0.cfs"));
1127
+ store->remove(store, "_0.cfs");
1128
+
1129
+ FRT_TRY
1130
+ ir = frt_ir_open(store);
1131
+ frt_ir_close(ir);
1132
+ Afail("reader should have failed to open on a crashed index");
1133
+ break;
1134
+ case FRT_IO_ERROR:
1135
+ FRT_HANDLED();
1136
+ FRT_XCATCHALL
1137
+ Afail("reader should have raised an FRT_IO_ERROR");
1138
+ FRT_HANDLED();
1139
+ FRT_XENDTRY
1140
+ destroy_docs(docs, BOOK_LIST_LENGTH);
1141
+ }
1142
+
1143
+ static void test_iw_add_docs(TestCase *tc, void *data)
1144
+ {
1145
+ int i;
1146
+ FrtConfig config = frt_default_config;
1147
+ FrtStore *store = (FrtStore *)data;
1148
+ FrtIndexWriter *iw;
1149
+ FrtDocument **docs = prep_book_list();
1150
+ config.merge_factor = 4;
1151
+ config.max_buffered_docs = 3;
1152
+
1153
+ iw = create_book_iw_conf(store, &config);
1154
+ for (i = 0; i < BOOK_LIST_LENGTH; i++) {
1155
+ frt_iw_add_doc(iw, docs[i]);
1156
+ }
1157
+ frt_iw_optimize(iw);
1158
+ Aiequal(BOOK_LIST_LENGTH, frt_iw_doc_count(iw));
1159
+
1160
+ frt_iw_close(iw);
1161
+ destroy_docs(docs, BOOK_LIST_LENGTH);
1162
+ if (!Aiequal(3, store->count(store))) {
1163
+ char *buf = frt_store_to_s(store);
1164
+ Tmsg("To many files: %s\n", buf);
1165
+ free(buf);
1166
+ }
1167
+ }
1168
+
1169
+ void test_iw_add_empty_tv(TestCase *tc, void *data)
1170
+ {
1171
+ FrtStore *store = (FrtStore *)data;
1172
+ FrtIndexWriter *iw;
1173
+ FrtDocument *doc;
1174
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
1175
+ frt_fis_add_field(fis, frt_fi_new(rb_intern("no_tv"), FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_NO));
1176
+ frt_index_create(store, fis);
1177
+ frt_fis_deref(fis);
1178
+
1179
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &frt_default_config);
1180
+ doc = frt_doc_new();
1181
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern("tv1")), (char *)""));
1182
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern("tv2")), (char *)""));
1183
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(rb_intern("no_tv")), (char *)"one two three"));
1184
+
1185
+ frt_iw_add_doc(iw, doc);
1186
+ frt_iw_commit(iw);
1187
+ Aiequal(1, frt_iw_doc_count(iw));
1188
+ frt_iw_close(iw);
1189
+ frt_doc_destroy(doc);
1190
+ }
1191
+
1192
+ static void test_iw_del_terms(TestCase *tc, void *data)
1193
+ {
1194
+ int i;
1195
+ FrtConfig config = frt_default_config;
1196
+ FrtStore *store = (FrtStore *)data;
1197
+ FrtIndexWriter *iw;
1198
+ FrtIndexReader *ir;
1199
+ FrtDocument **docs = prep_book_list();
1200
+ const char *terms[3];
1201
+ config.merge_factor = 4;
1202
+ config.max_buffered_docs = 3;
1203
+
1204
+ iw = create_book_iw_conf(store, &config);
1205
+ for (i = 0; i < BOOK_LIST_LENGTH; i++) {
1206
+ frt_iw_add_doc(iw, docs[i]);
1207
+ }
1208
+ Aiequal(BOOK_LIST_LENGTH, frt_iw_doc_count(iw));
1209
+ frt_iw_close(iw);
1210
+ destroy_docs(docs, BOOK_LIST_LENGTH);
1211
+
1212
+ ir = frt_ir_open(store);
1213
+ Aiequal(BOOK_LIST_LENGTH, ir->num_docs(ir));
1214
+ Aiequal(BOOK_LIST_LENGTH, ir->max_doc(ir));
1215
+ frt_ir_close(ir);
1216
+
1217
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1218
+ frt_iw_delete_term(iw, title, "State");
1219
+ frt_iw_close(iw);
1220
+
1221
+ ir = frt_ir_open(store);
1222
+ Aiequal(BOOK_LIST_LENGTH - 1, ir->num_docs(ir));
1223
+ Aiequal(BOOK_LIST_LENGTH, ir->max_doc(ir));
1224
+ frt_ir_close(ir);
1225
+
1226
+ /* test deleting multiple Terms */
1227
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1228
+ frt_iw_delete_term(iw, title, "The");
1229
+ frt_iw_delete_term(iw, title, "Blind");
1230
+ terms[0] = "Berger";
1231
+ terms[1] = "Middleton";
1232
+ terms[2] = "DBC";
1233
+ frt_iw_delete_terms(iw, author, (char **)terms, 3);
1234
+ frt_iw_close(iw);
1235
+
1236
+ ir = frt_ir_open(store);
1237
+ Aiequal(BOOK_LIST_LENGTH - 17, ir->num_docs(ir));
1238
+ Aiequal(BOOK_LIST_LENGTH, ir->max_doc(ir));
1239
+ Atrue(!ir->is_deleted(ir, 0));
1240
+ Atrue(ir->is_deleted(ir, 1));
1241
+ Atrue(ir->is_deleted(ir, 2));
1242
+ Atrue(ir->is_deleted(ir, 3));
1243
+ Atrue(ir->is_deleted(ir, 4));
1244
+ Atrue(ir->is_deleted(ir, 5));
1245
+ Atrue(ir->is_deleted(ir, 6));
1246
+ Atrue(!ir->is_deleted(ir, 7));
1247
+ Atrue(!ir->is_deleted(ir, 9));
1248
+ Atrue(ir->is_deleted(ir, 10));
1249
+ Atrue(!ir->is_deleted(ir, 11));
1250
+ Atrue(!ir->is_deleted(ir, 16));
1251
+ Atrue(ir->is_deleted(ir, 17));
1252
+ Atrue(ir->is_deleted(ir, 18));
1253
+ Atrue(ir->is_deleted(ir, 21));
1254
+ Atrue(ir->is_deleted(ir, 23));
1255
+ Atrue(ir->is_deleted(ir, 24));
1256
+ Atrue(ir->is_deleted(ir, 28));
1257
+ Atrue(ir->is_deleted(ir, 30));
1258
+ Atrue(ir->is_deleted(ir, 33));
1259
+ Atrue(ir->is_deleted(ir, 35));
1260
+ Atrue(ir->is_deleted(ir, 36));
1261
+ frt_ir_commit(ir);
1262
+
1263
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1264
+ frt_iw_optimize(iw);
1265
+ frt_iw_close(iw);
1266
+
1267
+ frt_ir_close(ir);
1268
+
1269
+ ir = frt_ir_open(store);
1270
+ Aiequal(BOOK_LIST_LENGTH - 17, ir->num_docs(ir));
1271
+ Aiequal(BOOK_LIST_LENGTH - 17, ir->max_doc(ir));
1272
+ frt_ir_close(ir);
1273
+ }
1274
+
1275
+ /****************************************************************************
1276
+ *
1277
+ * FrtIndexReader
1278
+ *
1279
+ ****************************************************************************/
1280
+
1281
+ static int segment_reader_type = 0;
1282
+ static int multi_reader_type = 1;
1283
+ static int multi_external_reader_type = 2;
1284
+ static int add_indexes_reader_type = 3;
1285
+
1286
+ typedef struct ReaderTestEnvironment {
1287
+ FrtStore **stores;
1288
+ int store_cnt;
1289
+ } ReaderTestEnvironment;
1290
+
1291
+ static void reader_test_env_destroy(ReaderTestEnvironment *rte)
1292
+ {
1293
+ int i;
1294
+ for (i = 0; i < rte->store_cnt; i++) {
1295
+ frt_store_deref(rte->stores[i]);
1296
+ }
1297
+ free(rte->stores);
1298
+ free(rte);
1299
+ }
1300
+
1301
+ static FrtIndexReader *reader_test_env_ir_open(ReaderTestEnvironment *rte)
1302
+ {
1303
+ if (rte->store_cnt == 1) {
1304
+ return frt_ir_open(rte->stores[0]);
1305
+ }
1306
+ else {
1307
+ FrtIndexReader **sub_readers = FRT_ALLOC_N(FrtIndexReader *, rte->store_cnt);
1308
+ int i;
1309
+ for (i = 0; i < rte->store_cnt; i++) {
1310
+ sub_readers[i] = frt_ir_open(rte->stores[i]);
1311
+ }
1312
+ return (frt_mr_open(sub_readers, rte->store_cnt));
1313
+ }
1314
+ }
1315
+
1316
+ static ReaderTestEnvironment *reader_test_env_new(int type)
1317
+ {
1318
+ int i, j;
1319
+ FrtConfig config = frt_default_config;
1320
+ FrtIndexWriter *iw;
1321
+ FrtDocument **docs = prep_ir_test_docs();
1322
+ ReaderTestEnvironment *rte = FRT_ALLOC(ReaderTestEnvironment);
1323
+ int store_cnt = rte->store_cnt
1324
+ = (type >= multi_external_reader_type) ? 64 : 1;
1325
+ int doc_cnt = IR_TEST_DOC_CNT / store_cnt;
1326
+
1327
+ rte->stores = FRT_ALLOC_N(FrtStore *, store_cnt);
1328
+ for (i = 0; i < store_cnt; i++) {
1329
+ FrtStore *store = rte->stores[i] = frt_open_ram_store();
1330
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
1331
+ FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1332
+ int start_doc = i * doc_cnt;
1333
+ int end_doc = (i + 1) * doc_cnt;
1334
+ if (end_doc > IR_TEST_DOC_CNT) {
1335
+ end_doc = IR_TEST_DOC_CNT;
1336
+ }
1337
+ frt_index_create(store, fis);
1338
+ frt_fis_deref(fis);
1339
+ config.max_buffered_docs = 3;
1340
+
1341
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1342
+
1343
+ for (j = start_doc; j < end_doc; j++) {
1344
+ int k;
1345
+ FrtDocument *doc = docs[j];
1346
+ /* add fields when needed. This is to make the FrtFieldInfos objects
1347
+ * different for multi_external_reader */
1348
+ for (k = 0; k < doc->size; k++) {
1349
+ FrtDocField *df = doc->fields[k];
1350
+ fis = iw->fis;
1351
+ if (NULL == frt_fis_get_field(fis, df->name)) {
1352
+ if (author == df->name) {
1353
+ frt_fis_add_field(fis, frt_fi_new(author, FRT_STORE_YES, FRT_INDEX_YES,
1354
+ FRT_TERM_VECTOR_WITH_POSITIONS));
1355
+ } else if (title == df->name) {
1356
+ frt_fis_add_field(fis, frt_fi_new(title, FRT_STORE_YES,
1357
+ FRT_INDEX_UNTOKENIZED,
1358
+ FRT_TERM_VECTOR_WITH_OFFSETS));
1359
+ } else if (year == df->name) {
1360
+ frt_fis_add_field(fis, frt_fi_new(year, FRT_STORE_YES,
1361
+ FRT_INDEX_UNTOKENIZED,
1362
+ FRT_TERM_VECTOR_NO));
1363
+ } else if (text == df->name) {
1364
+ frt_fis_add_field(fis, frt_fi_new(text, FRT_STORE_NO, FRT_INDEX_YES,
1365
+ FRT_TERM_VECTOR_NO));
1366
+ } else if (compressed_field == df->name) {
1367
+ frt_fis_add_field(fis, frt_fi_new(compressed_field,
1368
+ FRT_STORE_YES,
1369
+ FRT_INDEX_YES,
1370
+ FRT_TERM_VECTOR_NO));
1371
+ }
1372
+ }
1373
+ }
1374
+ frt_iw_add_doc(iw, doc);
1375
+ }
1376
+
1377
+ if (type == segment_reader_type) {
1378
+ frt_iw_optimize(iw);
1379
+ }
1380
+ frt_iw_close(iw);
1381
+ }
1382
+
1383
+ if (type == add_indexes_reader_type) {
1384
+ /* Prepare store for Add Indexes test */
1385
+ FrtStore *store = frt_open_ram_store();
1386
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
1387
+ FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1388
+ FrtIndexReader **readers = FRT_ALLOC_N(FrtIndexReader *, rte->store_cnt);
1389
+ int i;
1390
+ for (i = 0; i < rte->store_cnt; i++) {
1391
+ readers[i] = frt_ir_open(rte->stores[i]);
1392
+ }
1393
+ frt_index_create(store, fis);
1394
+ frt_fis_deref(fis);
1395
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1396
+ frt_iw_add_readers(iw, readers, rte->store_cnt - 10);
1397
+ frt_iw_close(iw);
1398
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1399
+ frt_iw_add_readers(iw, readers + (rte->store_cnt - 10), 10);
1400
+ frt_iw_close(iw);
1401
+ for (i = 0; i < rte->store_cnt; i++) {
1402
+ frt_ir_close(readers[i]);
1403
+ frt_store_deref(rte->stores[i]);
1404
+ }
1405
+ free(readers);
1406
+ rte->stores[0] = store;
1407
+ rte->store_cnt = 1;
1408
+ }
1409
+
1410
+ destroy_docs(docs, IR_TEST_DOC_CNT);
1411
+ return rte;
1412
+ }
1413
+
1414
+ static void write_ir_test_docs(FrtStore *store)
1415
+ {
1416
+ int i;
1417
+ FrtConfig config = frt_default_config;
1418
+ FrtIndexWriter *iw;
1419
+ FrtDocument **docs = prep_ir_test_docs();
1420
+
1421
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
1422
+ FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1423
+ frt_fis_add_field(fis, frt_fi_new(author, FRT_STORE_YES, FRT_INDEX_YES,
1424
+ FRT_TERM_VECTOR_WITH_POSITIONS));
1425
+ frt_fis_add_field(fis, frt_fi_new(title, FRT_STORE_YES, FRT_INDEX_UNTOKENIZED,
1426
+ FRT_TERM_VECTOR_WITH_OFFSETS));
1427
+ frt_fis_add_field(fis, frt_fi_new(year, FRT_STORE_YES, FRT_INDEX_UNTOKENIZED,
1428
+ FRT_TERM_VECTOR_NO));
1429
+ frt_fis_add_field(fis, frt_fi_new(text, FRT_STORE_NO, FRT_INDEX_YES,
1430
+ FRT_TERM_VECTOR_NO));
1431
+ frt_index_create(store, fis);
1432
+ frt_fis_deref(fis);
1433
+ config.max_buffered_docs = 5;
1434
+
1435
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), &config);
1436
+
1437
+ for (i = 0; i < IR_TEST_DOC_CNT; i++) {
1438
+ frt_iw_add_doc(iw, docs[i]);
1439
+ }
1440
+ frt_iw_close(iw);
1441
+
1442
+ destroy_docs(docs, IR_TEST_DOC_CNT);
1443
+ }
1444
+
1445
+ static void test_ir_open_empty_index(TestCase *tc, void *data)
1446
+ {
1447
+ FrtStore *store = (FrtStore *)data;
1448
+ store->clear_all(store);
1449
+ FRT_TRY
1450
+ frt_ir_close(frt_ir_open(store));
1451
+ Afail("IndexReader should have failed when opening empty index");
1452
+ break;
1453
+ case FRT_FILE_NOT_FOUND_ERROR:
1454
+ FRT_HANDLED();
1455
+ break;
1456
+ default:
1457
+ Afail("IndexReader should have raised FileNotfound Exception");
1458
+ FRT_HANDLED();
1459
+ FRT_XENDTRY
1460
+ }
1461
+
1462
+ static void test_ir_basic_ops(TestCase *tc, void *data)
1463
+ {
1464
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1465
+
1466
+ Aiequal(IR_TEST_DOC_CNT, ir->num_docs(ir));
1467
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
1468
+
1469
+ Aiequal(4, ir->doc_freq(ir, frt_fis_get_field(ir->fis, body)->number, "Wally"));
1470
+ Atrue(frt_ir_is_latest(ir));
1471
+ }
1472
+
1473
+ static void test_ir_term_docpos_enum_skip_to(TestCase *tc,
1474
+ FrtTermDocEnum *tde,
1475
+ int field_num)
1476
+ {
1477
+ /* test skip_to working skip interval */
1478
+ tde->seek(tde, field_num, "skip");
1479
+
1480
+ Atrue(tde->skip_to(tde, 10));
1481
+ Aiequal(22, tde->doc_num(tde));
1482
+ Aiequal(22, tde->freq(tde));
1483
+
1484
+ Atrue(tde->skip_to(tde, 100));
1485
+ Aiequal(100, tde->doc_num(tde));
1486
+ Aiequal(100, tde->freq(tde));
1487
+
1488
+ tde->seek(tde, field_num, "skip");
1489
+ Atrue(tde->skip_to(tde, 85));
1490
+ Aiequal(85, tde->doc_num(tde));
1491
+ Aiequal(85, tde->freq(tde));
1492
+
1493
+ Atrue(tde->skip_to(tde, 200));
1494
+ Aiequal(200, tde->doc_num(tde));
1495
+ Aiequal(200, tde->freq(tde));
1496
+
1497
+ Atrue(tde->skip_to(tde, 255));
1498
+ Aiequal(255, tde->doc_num(tde));
1499
+ Aiequal(255, tde->freq(tde));
1500
+
1501
+ Atrue(!tde->skip_to(tde, 256));
1502
+
1503
+ tde->seek(tde, field_num, "skip");
1504
+
1505
+ Atrue(!tde->skip_to(tde, 256));
1506
+ }
1507
+
1508
+ #define AA3(x, a, b, c) x[0] = a; x[1] = b; x[2] = c;
1509
+
1510
+ static void test_ir_term_enum(TestCase *tc, void *data)
1511
+ {
1512
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1513
+ FrtTermEnum *te = frt_ir_terms(ir, author);
1514
+
1515
+ Asequal("Leo", te->next(te));
1516
+ Asequal("Leo", te->curr_term);
1517
+ Aiequal(1, te->curr_ti.doc_freq);
1518
+ Asequal("Tolstoy", te->next(te));
1519
+ Asequal("Tolstoy", te->curr_term);
1520
+ Aiequal(1, te->curr_ti.doc_freq);
1521
+ Apnull(te->next(te));
1522
+
1523
+ te->set_field(te, frt_fis_get_field_num(ir->fis, body));
1524
+ Asequal("And", te->next(te));
1525
+ Asequal("And", te->curr_term);
1526
+ Aiequal(1, te->curr_ti.doc_freq);
1527
+
1528
+ Asequal("Not", te->skip_to(te, "Not"));
1529
+ Asequal("Not", te->curr_term);
1530
+ Aiequal(1, te->curr_ti.doc_freq);
1531
+ Asequal("Random", te->next(te));
1532
+ Asequal("Random", te->curr_term);
1533
+ Aiequal(16, te->curr_ti.doc_freq);
1534
+
1535
+ te->set_field(te, frt_fis_get_field_num(ir->fis, text));
1536
+ Asequal("which", te->skip_to(te, "which"));
1537
+ Asequal("which", te->curr_term);
1538
+ Aiequal(1, te->curr_ti.doc_freq);
1539
+ Apnull(te->next(te));
1540
+
1541
+ te->set_field(te, frt_fis_get_field_num(ir->fis, title));
1542
+ Asequal("Shawshank Redemption", te->next(te));
1543
+ Asequal("Shawshank Redemption", te->curr_term);
1544
+ Aiequal(1, te->curr_ti.doc_freq);
1545
+ Asequal("War And Peace", te->next(te));
1546
+ Asequal("War And Peace", te->curr_term);
1547
+ Aiequal(1, te->curr_ti.doc_freq);
1548
+ te->close(te);
1549
+
1550
+ te = frt_ir_terms_from(ir, body, "No");
1551
+ Asequal("Not", te->curr_term);
1552
+ Aiequal(1, te->curr_ti.doc_freq);
1553
+ Asequal("Random", te->next(te));
1554
+ Asequal("Random", te->curr_term);
1555
+ Aiequal(16, te->curr_ti.doc_freq);
1556
+ te->close(te);
1557
+ }
1558
+
1559
+ static void test_ir_term_doc_enum(TestCase *tc, void *data)
1560
+ {
1561
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1562
+
1563
+ FrtTermDocEnum *tde;
1564
+ FrtDocument *doc = frt_ir_get_doc_with_term(ir, tag, "id_test");
1565
+ int docs[3], expected_docs[3];
1566
+ int freqs[3], expected_freqs[3];
1567
+
1568
+ Apnotnull(doc);
1569
+ Asequal("id_test", frt_doc_get_field(doc, tag)->data[0]);
1570
+ Asequal("Some Random Sentence read", frt_doc_get_field(doc, body)->data[0]);
1571
+ frt_doc_destroy(doc);
1572
+
1573
+ /* test scanning */
1574
+ tde = ir_term_docs_for(ir, body, "Wally");
1575
+
1576
+ Atrue(tde->next(tde));
1577
+ Aiequal(0, tde->doc_num(tde));
1578
+ Aiequal(1, tde->freq(tde));
1579
+
1580
+ Atrue(tde->next(tde));
1581
+ Aiequal(5, tde->doc_num(tde));
1582
+ Aiequal(1, tde->freq(tde));
1583
+
1584
+ Atrue(tde->next(tde));
1585
+ Aiequal(18, tde->doc_num(tde));
1586
+ Aiequal(3, tde->freq(tde));
1587
+
1588
+ Atrue(tde->next(tde));
1589
+ Aiequal(20, tde->doc_num(tde));
1590
+ Aiequal(6, tde->freq(tde));
1591
+ Atrue(! tde->next(tde));
1592
+
1593
+ /* test fast read. Use a small array to exercise repeat read */
1594
+ tde->seek(tde, frt_fis_get_field(ir->fis, body)->number, "read");
1595
+ Aiequal(3, tde->read(tde, docs, freqs, 3));
1596
+ AA3(expected_freqs, 1, 2, 4);
1597
+ AA3(expected_docs, 1, 2, 6);
1598
+ Aaiequal(expected_docs, docs, 3);
1599
+ Aaiequal(expected_freqs, freqs, 3);
1600
+
1601
+ Aiequal(3, tde->read(tde, docs, freqs, 3));
1602
+ AA3(expected_docs, 9, 10, 15);
1603
+ AA3(expected_freqs, 3, 1, 1);
1604
+ Aaiequal(expected_docs, docs, 3);
1605
+ Aaiequal(expected_freqs, freqs, 3);
1606
+
1607
+ Aiequal(3, tde->read(tde, docs, freqs, 3));
1608
+ AA3(expected_docs, 16, 17, 20);
1609
+ AA3(expected_freqs, 2, 1, 1);
1610
+ Aaiequal(expected_docs, docs, 3);
1611
+ Aaiequal(expected_freqs, freqs, 3);
1612
+
1613
+ Aiequal(1, tde->read(tde, docs, freqs, 3));
1614
+ expected_docs[0] = 21;
1615
+ expected_freqs[0] = 6;
1616
+ Aaiequal(expected_docs, docs, 1);
1617
+ Aaiequal(expected_freqs, freqs, 1);
1618
+
1619
+ Aiequal(0, tde->read(tde, docs, freqs, 3));
1620
+
1621
+ test_ir_term_docpos_enum_skip_to(tc, tde,
1622
+ frt_fis_get_field(ir->fis, text)->number);
1623
+ tde->close(tde);
1624
+
1625
+ /* test term positions */
1626
+ tde = frt_ir_term_positions_for(ir, body, "read");
1627
+ Aiequal(-1, tde->next_position(tde));
1628
+
1629
+ Atrue(tde->next(tde));
1630
+ Aiequal(1, tde->doc_num(tde));
1631
+ Aiequal(1, tde->freq(tde));
1632
+ Aiequal(3, tde->next_position(tde));
1633
+ Aiequal(-1, tde->next_position(tde));
1634
+
1635
+ Atrue(tde->next(tde));
1636
+ Aiequal(2, tde->doc_num(tde));
1637
+ Aiequal(2, tde->freq(tde));
1638
+ Aiequal(1, tde->next_position(tde));
1639
+ Aiequal(4, tde->next_position(tde));
1640
+
1641
+ Atrue(tde->next(tde));
1642
+ Aiequal(6, tde->doc_num(tde));
1643
+ Aiequal(4, tde->freq(tde));
1644
+ Aiequal(3, tde->next_position(tde));
1645
+ Aiequal(4, tde->next_position(tde));
1646
+
1647
+ Atrue(tde->next(tde));
1648
+ Aiequal(9, tde->doc_num(tde));
1649
+ Aiequal(3, tde->freq(tde));
1650
+ Aiequal(0, tde->next_position(tde));
1651
+ Aiequal(4, tde->next_position(tde));
1652
+
1653
+ Atrue(tde->skip_to(tde, 16));
1654
+ Aiequal(16, tde->doc_num(tde));
1655
+ Aiequal(2, tde->freq(tde));
1656
+ Aiequal(2, tde->next_position(tde));
1657
+
1658
+ Atrue(tde->skip_to(tde, 21));
1659
+ Aiequal(21, tde->doc_num(tde));
1660
+ Aiequal(6, tde->freq(tde));
1661
+ Aiequal(3, tde->next_position(tde));
1662
+ Aiequal(4, tde->next_position(tde));
1663
+ Aiequal(5, tde->next_position(tde));
1664
+ Aiequal(8, tde->next_position(tde));
1665
+ Aiequal(9, tde->next_position(tde));
1666
+ Aiequal(10, tde->next_position(tde));
1667
+
1668
+ Atrue(! tde->next(tde));
1669
+
1670
+ test_ir_term_docpos_enum_skip_to(tc, tde,
1671
+ frt_fis_get_field(ir->fis, text)->number);
1672
+ tde->close(tde);
1673
+ }
1674
+
1675
+ static void test_ir_term_vectors(TestCase *tc, void *data)
1676
+ {
1677
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1678
+
1679
+ FrtTermVector *tv = ir->term_vector(ir, 3, rb_intern("body"));
1680
+ FrtHash *tvs;
1681
+
1682
+ Asequal("body", rb_id2name(tv->field));
1683
+ Aiequal(4, tv->term_cnt);
1684
+ Asequal("word1", tv->terms[0].text);
1685
+ Asequal("word2", tv->terms[1].text);
1686
+ Asequal("word3", tv->terms[2].text);
1687
+ Asequal("word4", tv->terms[3].text);
1688
+ Aiequal(3, tv->terms[0].freq);
1689
+ Aiequal(2, tv->terms[0].positions[0]);
1690
+ Aiequal(4, tv->terms[0].positions[1]);
1691
+ Aiequal(7, tv->terms[0].positions[2]);
1692
+ Aiequal(12, tv->offsets[tv->terms[0].positions[0]].start);
1693
+ Aiequal(17, tv->offsets[tv->terms[0].positions[0]].end);
1694
+ Aiequal(24, tv->offsets[tv->terms[0].positions[1]].start);
1695
+ Aiequal(29, tv->offsets[tv->terms[0].positions[1]].end);
1696
+ Aiequal(42, tv->offsets[tv->terms[0].positions[2]].start);
1697
+ Aiequal(47, tv->offsets[tv->terms[0].positions[2]].end);
1698
+
1699
+ Aiequal(1, tv->terms[1].freq);
1700
+ Aiequal(3, tv->terms[1].positions[0]);
1701
+ Aiequal(18, tv->offsets[tv->terms[1].positions[0]].start);
1702
+ Aiequal(23, tv->offsets[tv->terms[1].positions[0]].end);
1703
+
1704
+ Aiequal(4, tv->terms[2].freq);
1705
+ Aiequal(0, tv->terms[2].positions[0]);
1706
+ Aiequal(5, tv->terms[2].positions[1]);
1707
+ Aiequal(8, tv->terms[2].positions[2]);
1708
+ Aiequal(9, tv->terms[2].positions[3]);
1709
+ Aiequal(0, tv->offsets[tv->terms[2].positions[0]].start);
1710
+ Aiequal(5, tv->offsets[tv->terms[2].positions[0]].end);
1711
+ Aiequal(30, tv->offsets[tv->terms[2].positions[1]].start);
1712
+ Aiequal(35, tv->offsets[tv->terms[2].positions[1]].end);
1713
+ Aiequal(48, tv->offsets[tv->terms[2].positions[2]].start);
1714
+ Aiequal(53, tv->offsets[tv->terms[2].positions[2]].end);
1715
+ Aiequal(54, tv->offsets[tv->terms[2].positions[3]].start);
1716
+ Aiequal(59, tv->offsets[tv->terms[2].positions[3]].end);
1717
+
1718
+ Aiequal(2, tv->terms[3].freq);
1719
+ Aiequal(1, tv->terms[3].positions[0]);
1720
+ Aiequal(6, tv->terms[3].positions[1]);
1721
+ Aiequal(6, tv->offsets[tv->terms[3].positions[0]].start);
1722
+ Aiequal(11, tv->offsets[tv->terms[3].positions[0]].end);
1723
+ Aiequal(36, tv->offsets[tv->terms[3].positions[1]].start);
1724
+ Aiequal(41, tv->offsets[tv->terms[3].positions[1]].end);
1725
+
1726
+ frt_tv_destroy(tv);
1727
+
1728
+ tvs = ir->term_vectors(ir, 3);
1729
+ Aiequal(3, tvs->size);
1730
+ tv = (FrtTermVector *)frt_h_get(tvs, (void *)rb_intern("author"));
1731
+ if (Apnotnull(tv)) {
1732
+ Asequal("author", rb_id2name(tv->field));
1733
+ Aiequal(2, tv->term_cnt);
1734
+ Aiequal(0, tv->offset_cnt);
1735
+ Apnull(tv->offsets);
1736
+ }
1737
+ tv = (FrtTermVector *)frt_h_get(tvs, (void *)rb_intern("body"));
1738
+ if (Apnotnull(tv)) {
1739
+ Asequal("body", rb_id2name(tv->field));
1740
+ Aiequal(4, tv->term_cnt);
1741
+ }
1742
+ tv = (FrtTermVector *)frt_h_get(tvs, (void *)rb_intern("title"));
1743
+ if (Apnotnull(tv)) {
1744
+ Asequal("title", rb_id2name(tv->field));
1745
+ Aiequal(1, tv->term_cnt); /* untokenized */
1746
+ Aiequal(1, tv->offset_cnt);
1747
+ Asequal("War And Peace", tv->terms[0].text);
1748
+ Apnull(tv->terms[0].positions);
1749
+ Aiequal(0, tv->offsets[0].start);
1750
+ Aiequal(13, tv->offsets[0].end);
1751
+ }
1752
+ frt_h_destroy(tvs);
1753
+ }
1754
+
1755
+ static void test_ir_get_doc(TestCase *tc, void *data)
1756
+ {
1757
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1758
+ FrtDocument *doc = ir->get_doc(ir, 3);
1759
+ FrtDocField *df;
1760
+ Aiequal(4, doc->size);
1761
+
1762
+ df = frt_doc_get_field(doc, author);
1763
+ Asequal(rb_id2name(author), rb_id2name(df->name));
1764
+ Asequal("Leo Tolstoy", df->data[0]);
1765
+ Afequal(df->boost, 1.0);
1766
+
1767
+ df = frt_doc_get_field(doc, body);
1768
+ Asequal(rb_id2name(body), rb_id2name(df->name));
1769
+ Asequal("word3 word4 word1 word2 word1 "
1770
+ "word3 word4 word1 word3 word3", df->data[0]);
1771
+ Afequal(df->boost, 1.0);
1772
+ df = frt_doc_get_field(doc, title);
1773
+ Asequal(rb_id2name(title), rb_id2name(df->name));
1774
+ Asequal("War And Peace", df->data[0]);
1775
+ Afequal(df->boost, 1.0);
1776
+
1777
+ df = frt_doc_get_field(doc, year);
1778
+ Asequal(rb_id2name(year), rb_id2name(df->name));
1779
+ Asequal("1865", df->data[0]);
1780
+ Afequal(df->boost, 1.0);
1781
+
1782
+ df = frt_doc_get_field(doc, text);
1783
+ Apnull(df); /* text is not stored */
1784
+
1785
+ frt_doc_destroy(doc);
1786
+ }
1787
+
1788
+ static void test_ir_compression(TestCase *tc, void *data)
1789
+ {
1790
+ int i;
1791
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1792
+ FrtLazyDoc *lz_doc;
1793
+ FrtLazyDocField *lz_df1, *lz_df2;
1794
+ FrtDocument *doc = ir->get_doc(ir, 0);
1795
+ FrtDocField *df1, *df2;
1796
+ char buf1[20], buf2[20];
1797
+ Aiequal(3, doc->size);
1798
+
1799
+ df1 = frt_doc_get_field(doc, changing_field);
1800
+ df2 = frt_doc_get_field(doc, compressed_field);
1801
+ Asequal(df1->data[0], df2->data[0]);
1802
+ Assert(df1->lengths[0] == df2->lengths[0], "Field lengths should be equal");
1803
+ frt_doc_destroy(doc);
1804
+
1805
+ doc = ir->get_doc(ir, 2);
1806
+ df1 = frt_doc_get_field(doc, tag);
1807
+ df2 = frt_doc_get_field(doc, compressed_field);
1808
+ for (i = 0; i < 4; i++) {
1809
+ Asequal(df1->data[i], df2->data[i]);
1810
+ Assert(df1->lengths[i] == df2->lengths[i], "Field lengths not equal");
1811
+ }
1812
+ frt_doc_destroy(doc);
1813
+
1814
+ lz_doc = ir->get_lazy_doc(ir, 0);
1815
+ lz_df1 = frt_lazy_doc_get(lz_doc, changing_field);
1816
+ lz_df2 = frt_lazy_doc_get(lz_doc, compressed_field);
1817
+ Asequal(frt_lazy_df_get_data(lz_df1, 0), frt_lazy_df_get_data(lz_df2, 0));
1818
+ frt_lazy_doc_close(lz_doc);
1819
+
1820
+ lz_doc = ir->get_lazy_doc(ir, 2);
1821
+ lz_df1 = frt_lazy_doc_get(lz_doc, tag);
1822
+ lz_df2 = frt_lazy_doc_get(lz_doc, compressed_field);
1823
+ for (i = 0; i < 4; i++) {
1824
+ Asequal(frt_lazy_df_get_data(lz_df1, i), frt_lazy_df_get_data(lz_df2, i));
1825
+ }
1826
+ frt_lazy_doc_close(lz_doc);
1827
+
1828
+ lz_doc = ir->get_lazy_doc(ir, 2);
1829
+ lz_df1 = frt_lazy_doc_get(lz_doc, tag);
1830
+ lz_df2 = frt_lazy_doc_get(lz_doc, compressed_field);
1831
+ frt_lazy_df_get_bytes(lz_df1, buf1, 5, 11);
1832
+ frt_lazy_df_get_bytes(lz_df2, buf2, 5, 11);
1833
+ buf2[11] = buf1[11] = '\0';
1834
+ Asequal(buf1, buf2);
1835
+ frt_lazy_doc_close(lz_doc);
1836
+ }
1837
+
1838
+ static void test_ir_mtdpe(TestCase *tc, void *data)
1839
+ {
1840
+ FrtIndexReader *ir = (FrtIndexReader *)data;
1841
+ const char *terms[3] = {"Where", "is", "books."};
1842
+
1843
+ FrtTermDocEnum *tde = frt_mtdpe_new(ir, frt_fis_get_field(ir->fis, body)->number, (char **)terms, 3);
1844
+
1845
+ Atrue(tde->next(tde));
1846
+ Aiequal(0, tde->doc_num(tde));
1847
+ Aiequal(2, tde->freq(tde));
1848
+ Aiequal(0, tde->next_position(tde));
1849
+ Aiequal(1, tde->next_position(tde));
1850
+ Atrue(tde->next(tde));
1851
+ Aiequal(20, tde->doc_num(tde));
1852
+ Aiequal(2, tde->freq(tde));
1853
+ Aiequal(1, tde->next_position(tde));
1854
+ Aiequal(17, tde->next_position(tde));
1855
+ Atrue(!tde->next(tde));
1856
+ tde->close(tde);
1857
+ }
1858
+
1859
+ static void test_ir_norms(TestCase *tc, void *data)
1860
+ {
1861
+ int i;
1862
+ frt_uchar *norms;
1863
+ FrtIndexReader *ir, *ir2;
1864
+ FrtIndexWriter *iw;
1865
+ int type = *((int *)data);
1866
+ ReaderTestEnvironment *rte;
1867
+
1868
+ rte = reader_test_env_new(type);
1869
+ ir = reader_test_env_ir_open(rte);
1870
+ ir2 = reader_test_env_ir_open(rte);
1871
+ Atrue(!frt_index_is_locked(rte->stores[0]));
1872
+
1873
+ frt_ir_set_norm(ir, 3, title, 1);
1874
+ Atrue(frt_index_is_locked(rte->stores[0]));
1875
+ frt_ir_set_norm(ir, 3, body, 12);
1876
+ frt_ir_set_norm(ir, 3, author, 145);
1877
+ frt_ir_set_norm(ir, 3, year, 31);
1878
+ frt_ir_set_norm(ir, 5, text, 202);
1879
+ frt_ir_set_norm(ir, 25, text, 20);
1880
+ frt_ir_set_norm(ir, 50, text, 200);
1881
+ frt_ir_set_norm(ir, 75, text, 155);
1882
+ frt_ir_set_norm(ir, 80, text, 0);
1883
+ frt_ir_set_norm(ir, 150, text, 255);
1884
+ frt_ir_set_norm(ir, 255, text, 76);
1885
+
1886
+ frt_ir_commit(ir);
1887
+ Atrue(!frt_index_is_locked(rte->stores[0]));
1888
+
1889
+ norms = frt_ir_get_norms(ir, text);
1890
+
1891
+ Aiequal(202, norms[5]);
1892
+ Aiequal(20, norms[25]);
1893
+ Aiequal(200, norms[50]);
1894
+ Aiequal(155, norms[75]);
1895
+ Aiequal(0, norms[80]);
1896
+ Aiequal(255, norms[150]);
1897
+ Aiequal(76, norms[255]);
1898
+
1899
+ norms = frt_ir_get_norms(ir, title);
1900
+ Aiequal(1, norms[3]);
1901
+
1902
+ norms = frt_ir_get_norms(ir, body);
1903
+ Aiequal(12, norms[3]);
1904
+
1905
+ norms = frt_ir_get_norms(ir, author);
1906
+ Aiequal(145, norms[3]);
1907
+
1908
+ norms = frt_ir_get_norms(ir, year);
1909
+ /* Apnull(norms); */
1910
+
1911
+ norms = FRT_ALLOC_N(frt_uchar, 356);
1912
+ frt_ir_get_norms_into(ir, text, norms + 100);
1913
+ Aiequal(202, norms[105]);
1914
+ Aiequal(20, norms[125]);
1915
+ Aiequal(200, norms[150]);
1916
+ Aiequal(155, norms[175]);
1917
+ Aiequal(0, norms[180]);
1918
+ Aiequal(255, norms[250]);
1919
+ Aiequal(76, norms[355]);
1920
+
1921
+ frt_ir_commit(ir);
1922
+
1923
+ for (i = 0; i < rte->store_cnt; i++) {
1924
+ iw = frt_iw_open(rte->stores[i], frt_whitespace_analyzer_new(false),
1925
+ &frt_default_config);
1926
+ frt_iw_optimize(iw);
1927
+ frt_iw_close(iw);
1928
+ }
1929
+
1930
+ frt_ir_close(ir);
1931
+
1932
+ ir = reader_test_env_ir_open(rte);
1933
+
1934
+ memset(norms, 0, 356);
1935
+ frt_ir_get_norms_into(ir, text, norms + 100);
1936
+ Aiequal(0, norms[102]);
1937
+ Aiequal(202, norms[105]);
1938
+ Aiequal(0, norms[104]);
1939
+ Aiequal(20, norms[125]);
1940
+ Aiequal(200, norms[150]);
1941
+ Aiequal(155, norms[175]);
1942
+ Aiequal(0, norms[180]);
1943
+ Aiequal(255, norms[250]);
1944
+ Aiequal(76, norms[355]);
1945
+
1946
+ Atrue(!frt_index_is_locked(rte->stores[0]));
1947
+ frt_ir_set_norm(ir, 0, text, 155);
1948
+ Atrue(frt_index_is_locked(rte->stores[0]));
1949
+ frt_ir_close(ir);
1950
+ frt_ir_close(ir2);
1951
+ Atrue(!frt_index_is_locked(rte->stores[0]));
1952
+ reader_test_env_destroy(rte);
1953
+ free(norms);
1954
+ }
1955
+
1956
+ static void test_ir_delete(TestCase *tc, void *data)
1957
+ {
1958
+ int i;
1959
+ FrtStore *store = frt_open_ram_store();
1960
+ FrtIndexReader *ir, *ir2;
1961
+ FrtIndexWriter *iw;
1962
+ int type = *((int *)data);
1963
+ ReaderTestEnvironment *rte;
1964
+
1965
+ rte = reader_test_env_new(type);
1966
+ ir = reader_test_env_ir_open(rte);
1967
+ ir2 = reader_test_env_ir_open(rte);
1968
+
1969
+ Aiequal(false, ir->has_deletions(ir));
1970
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
1971
+ Aiequal(IR_TEST_DOC_CNT, ir->num_docs(ir));
1972
+ Aiequal(false, ir->is_deleted(ir, 10));
1973
+
1974
+ frt_ir_delete_doc(ir, 10);
1975
+ Aiequal(true, ir->has_deletions(ir));
1976
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
1977
+ Aiequal(IR_TEST_DOC_CNT - 1, ir->num_docs(ir));
1978
+ Aiequal(true, ir->is_deleted(ir, 10));
1979
+
1980
+ frt_ir_delete_doc(ir, 10);
1981
+ Aiequal(true, ir->has_deletions(ir));
1982
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
1983
+ Aiequal(IR_TEST_DOC_CNT - 1, ir->num_docs(ir));
1984
+ Aiequal(true, ir->is_deleted(ir, 10));
1985
+
1986
+ frt_ir_delete_doc(ir, IR_TEST_DOC_CNT - 1);
1987
+ Aiequal(true, ir->has_deletions(ir));
1988
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
1989
+ Aiequal(IR_TEST_DOC_CNT - 2, ir->num_docs(ir));
1990
+ Aiequal(true, ir->is_deleted(ir, IR_TEST_DOC_CNT - 1));
1991
+
1992
+ frt_ir_delete_doc(ir, IR_TEST_DOC_CNT - 2);
1993
+ Aiequal(true, ir->has_deletions(ir));
1994
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
1995
+ Aiequal(IR_TEST_DOC_CNT - 3, ir->num_docs(ir));
1996
+ Aiequal(true, ir->is_deleted(ir, IR_TEST_DOC_CNT - 2));
1997
+
1998
+ frt_ir_undelete_all(ir);
1999
+ Aiequal(false, ir->has_deletions(ir));
2000
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
2001
+ Aiequal(IR_TEST_DOC_CNT, ir->num_docs(ir));
2002
+ Aiequal(false, ir->is_deleted(ir, 10));
2003
+ Aiequal(false, ir->is_deleted(ir, IR_TEST_DOC_CNT - 2));
2004
+ Aiequal(false, ir->is_deleted(ir, IR_TEST_DOC_CNT - 1));
2005
+
2006
+ frt_ir_delete_doc(ir, 10);
2007
+ frt_ir_delete_doc(ir, 20);
2008
+ frt_ir_delete_doc(ir, 30);
2009
+ frt_ir_delete_doc(ir, 40);
2010
+ frt_ir_delete_doc(ir, 50);
2011
+ frt_ir_delete_doc(ir, IR_TEST_DOC_CNT - 1);
2012
+ Aiequal(true, ir->has_deletions(ir));
2013
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
2014
+ Aiequal(IR_TEST_DOC_CNT - 6, ir->num_docs(ir));
2015
+
2016
+ frt_ir_close(ir);
2017
+
2018
+ ir = reader_test_env_ir_open(rte);
2019
+
2020
+ Aiequal(true, ir->has_deletions(ir));
2021
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
2022
+ Aiequal(IR_TEST_DOC_CNT - 6, ir->num_docs(ir));
2023
+ Aiequal(true, ir->is_deleted(ir, 10));
2024
+ Aiequal(true, ir->is_deleted(ir, 20));
2025
+ Aiequal(true, ir->is_deleted(ir, 30));
2026
+ Aiequal(true, ir->is_deleted(ir, 40));
2027
+ Aiequal(true, ir->is_deleted(ir, 50));
2028
+ Aiequal(true, ir->is_deleted(ir, IR_TEST_DOC_CNT - 1));
2029
+
2030
+ frt_ir_undelete_all(ir);
2031
+ Aiequal(false, ir->has_deletions(ir));
2032
+ Aiequal(IR_TEST_DOC_CNT, ir->max_doc(ir));
2033
+ Aiequal(IR_TEST_DOC_CNT, ir->num_docs(ir));
2034
+ Aiequal(false, ir->is_deleted(ir, 10));
2035
+ Aiequal(false, ir->is_deleted(ir, 20));
2036
+ Aiequal(false, ir->is_deleted(ir, 30));
2037
+ Aiequal(false, ir->is_deleted(ir, 40));
2038
+ Aiequal(false, ir->is_deleted(ir, 50));
2039
+ Aiequal(false, ir->is_deleted(ir, IR_TEST_DOC_CNT - 1));
2040
+
2041
+ frt_ir_delete_doc(ir, 10);
2042
+ frt_ir_delete_doc(ir, 20);
2043
+ frt_ir_delete_doc(ir, 30);
2044
+ frt_ir_delete_doc(ir, 40);
2045
+ frt_ir_delete_doc(ir, 50);
2046
+ frt_ir_delete_doc(ir, IR_TEST_DOC_CNT - 1);
2047
+
2048
+ frt_ir_commit(ir);
2049
+
2050
+ for (i = 0; i < rte->store_cnt; i++) {
2051
+ iw = frt_iw_open(rte->stores[i], frt_whitespace_analyzer_new(false),
2052
+ &frt_default_config);
2053
+ frt_iw_optimize(iw);
2054
+ frt_iw_close(iw);
2055
+ }
2056
+
2057
+ frt_ir_close(ir);
2058
+ ir = reader_test_env_ir_open(rte);
2059
+
2060
+ Aiequal(false, ir->has_deletions(ir));
2061
+ Aiequal(IR_TEST_DOC_CNT - 6, ir->max_doc(ir));
2062
+ Aiequal(IR_TEST_DOC_CNT - 6, ir->num_docs(ir));
2063
+
2064
+ Atrue(frt_ir_is_latest(ir));
2065
+ Atrue(!frt_ir_is_latest(ir2));
2066
+
2067
+ frt_ir_close(ir);
2068
+ frt_ir_close(ir2);
2069
+ reader_test_env_destroy(rte);
2070
+ frt_store_deref(store);
2071
+ }
2072
+
2073
+ static void test_ir_read_while_optimizing(TestCase *tc, void *data)
2074
+ {
2075
+ FrtStore *store = (FrtStore *)data;
2076
+ FrtIndexReader *ir;
2077
+ FrtIndexWriter *iw;
2078
+
2079
+ write_ir_test_docs(store);
2080
+
2081
+ ir = frt_ir_open(store);
2082
+
2083
+ test_ir_term_doc_enum(tc, ir);
2084
+
2085
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), false);
2086
+ frt_iw_optimize(iw);
2087
+ frt_iw_close(iw);
2088
+
2089
+ test_ir_term_doc_enum(tc, ir);
2090
+
2091
+ frt_ir_close(ir);
2092
+ }
2093
+
2094
+ static void test_ir_multivalue_fields(TestCase *tc, void *data)
2095
+ {
2096
+ FrtStore *store = (FrtStore *)data;
2097
+ FrtIndexReader *ir;
2098
+ FrtFieldInfo *fi;
2099
+ FrtDocument *doc = frt_doc_new();
2100
+ FrtDocField *df;
2101
+ FrtIndexWriter *iw;
2102
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
2103
+ FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
2104
+ const char *body_text = "this is the body FrtDocument Field";
2105
+ const char *title_text = "this is the title FrtDocument Field";
2106
+ const char *author_text = "this is the author FrtDocument Field";
2107
+
2108
+ frt_index_create(store, fis);
2109
+ frt_fis_deref(fis);
2110
+ iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
2111
+
2112
+ df = frt_doc_add_field(doc, frt_df_add_data(frt_df_new(tag), (char *)"Ruby"));
2113
+ frt_df_add_data(df, (char *)"C");
2114
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(body), (char *)body_text));
2115
+ frt_df_add_data(df, (char *)"Lucene");
2116
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(title), (char *)title_text));
2117
+ frt_df_add_data(df, (char *)"Ferret");
2118
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(author), (char *)author_text));
2119
+
2120
+ Aiequal(0, iw->fis->size);
2121
+
2122
+ frt_iw_add_doc(iw, doc);
2123
+
2124
+ fi = frt_fis_get_field(iw->fis, tag);
2125
+ Aiequal(true, fi_is_stored(fi));
2126
+ Aiequal(true, fi_is_indexed(fi));
2127
+ Aiequal(true, fi_is_tokenized(fi));
2128
+ Aiequal(true, fi_has_norms(fi));
2129
+ Aiequal(true, fi_store_term_vector(fi));
2130
+ Aiequal(true, fi_store_offsets(fi));
2131
+ Aiequal(true, fi_store_positions(fi));
2132
+
2133
+ frt_doc_destroy(doc);
2134
+ frt_iw_close(iw);
2135
+
2136
+ ir = frt_ir_open(store);
2137
+
2138
+ doc = ir->get_doc(ir, 0);
2139
+ Aiequal(4, doc->size);
2140
+ df = frt_doc_get_field(doc, tag);
2141
+ Aiequal(4, df->size);
2142
+ Asequal("Ruby", df->data[0]);
2143
+ Asequal("C", df->data[1]);
2144
+ Asequal("Lucene", df->data[2]);
2145
+ Asequal("Ferret", df->data[3]);
2146
+
2147
+ df = frt_doc_get_field(doc, body);
2148
+ Aiequal(1, df->size);
2149
+ Asequal(body_text, df->data[0]);
2150
+
2151
+ df = frt_doc_get_field(doc, title);
2152
+ Aiequal(1, df->size);
2153
+ Asequal(title_text, df->data[0]);
2154
+
2155
+ df = frt_doc_get_field(doc, author);
2156
+ Aiequal(1, df->size);
2157
+ Asequal(author_text, df->data[0]);
2158
+
2159
+ frt_doc_destroy(doc);
2160
+ frt_ir_delete_doc(ir, 0);
2161
+ frt_ir_close(ir);
2162
+ }
2163
+
2164
+ /***************************************************************************
2165
+ *
2166
+ * IndexSuite
2167
+ *
2168
+ ***************************************************************************/
2169
+ TestSuite *ts_index(TestSuite *suite)
2170
+ {
2171
+ FrtIndexReader *ir;
2172
+ FrtStore *fs_store, *store = frt_open_ram_store();
2173
+ ReaderTestEnvironment *rte = NULL;
2174
+ /* FrtStore *store = frt_open_fs_store(TEST_DIR); */
2175
+
2176
+ /* initialize FrtSymbols */
2177
+ body = rb_intern("body");
2178
+ title = rb_intern("title");
2179
+ text = rb_intern("text");
2180
+ author = rb_intern("author");
2181
+ year = rb_intern("year");
2182
+ changing_field = rb_intern("changing_field");
2183
+ compressed_field = rb_intern("compressed_field");
2184
+ tag = rb_intern("tag");
2185
+
2186
+ srand(5);
2187
+ suite = tst_add_suite(suite, "test_term_doc_enum");
2188
+
2189
+ /* FrtTermDocEnum */
2190
+ tst_run_test(suite, test_segment_term_doc_enum, store);
2191
+ tst_run_test(suite, test_segment_tde_deleted_docs, store);
2192
+
2193
+ suite = ADD_SUITE(suite);
2194
+ /* Index */
2195
+ tst_run_test(suite, test_index_create, store);
2196
+ tst_run_test(suite, test_index_version, store);
2197
+ tst_run_test(suite, test_index_undelete_all_after_close, store);
2198
+
2199
+ /* FrtIndexWriter */
2200
+ tst_run_test(suite, test_fld_inverter, store);
2201
+ tst_run_test(suite, test_postings_sorter, NULL);
2202
+ tst_run_test(suite, test_iw_add_doc, store);
2203
+ tst_run_test(suite, test_iw_add_docs, store);
2204
+ tst_run_test(suite, test_iw_add_empty_tv, store);
2205
+ tst_run_test(suite, test_iw_del_terms, store);
2206
+ tst_run_test(suite, test_create_with_reader, store);
2207
+ tst_run_test(suite, test_simulated_crashed_writer, store);
2208
+ tst_run_test(suite, test_simulated_corrupt_index1, store);
2209
+ tst_run_test(suite, test_simulated_corrupt_index2, store);
2210
+
2211
+ /* FrtIndexReader */
2212
+ tst_run_test(suite, test_ir_open_empty_index, store);
2213
+
2214
+ /* Test SEGMENT Reader */
2215
+ rte = reader_test_env_new(segment_reader_type);
2216
+ ir = reader_test_env_ir_open(rte);
2217
+ tst_run_test_with_name(suite, test_ir_basic_ops, ir, "test_segment_reader_basic_ops");
2218
+ tst_run_test_with_name(suite, test_ir_get_doc, ir, "test_segment_get_doc");
2219
+ tst_run_test_with_name(suite, test_ir_compression, ir, "test_segment_compression");
2220
+ tst_run_test_with_name(suite, test_ir_term_enum, ir, "test_segment_term_enum");
2221
+ tst_run_test_with_name(suite, test_ir_term_doc_enum, ir, "test_segment_term_doc_enum");
2222
+ tst_run_test_with_name(suite, test_ir_term_vectors, ir, "test_segment_term_vectors");
2223
+ tst_run_test_with_name(suite, test_ir_mtdpe, ir, "test_segment_multiple_term_doc_pos_enum");
2224
+ tst_run_test_with_name(suite, test_ir_norms, &segment_reader_type, "test_segment_norms");
2225
+ tst_run_test_with_name(suite, test_ir_delete, &segment_reader_type, "test_segment_reader_delete");
2226
+ frt_ir_close(ir);
2227
+ reader_test_env_destroy(rte);
2228
+
2229
+ /* Test MULTI Reader */
2230
+ rte = reader_test_env_new(multi_reader_type);
2231
+ ir = reader_test_env_ir_open(rte);
2232
+
2233
+ tst_run_test_with_name(suite, test_ir_basic_ops, ir,
2234
+ "test_multi_reader_basic_ops");
2235
+ tst_run_test_with_name(suite, test_ir_get_doc, ir,
2236
+ "test_multi_get_doc");
2237
+ tst_run_test_with_name(suite, test_ir_compression, ir,
2238
+ "test_multi_compression");
2239
+ tst_run_test_with_name(suite, test_ir_term_enum, ir,
2240
+ "test_multi_term_enum");
2241
+ tst_run_test_with_name(suite, test_ir_term_doc_enum, ir,
2242
+ "test_multi_term_doc_enum");
2243
+ tst_run_test_with_name(suite, test_ir_term_vectors, ir,
2244
+ "test_multi_term_vectors");
2245
+ tst_run_test_with_name(suite, test_ir_mtdpe, ir,
2246
+ "test_multi_multiple_term_doc_pos_enum");
2247
+
2248
+ tst_run_test_with_name(suite, test_ir_norms, &multi_reader_type,
2249
+ "test_multi_norms");
2250
+ tst_run_test_with_name(suite, test_ir_delete, &multi_reader_type,
2251
+ "test_multi_reader_delete");
2252
+ frt_ir_close(ir);
2253
+ reader_test_env_destroy(rte);
2254
+
2255
+ /* Test MULTI Reader with seperate stores */
2256
+ rte = reader_test_env_new(multi_external_reader_type);
2257
+ ir = reader_test_env_ir_open(rte);
2258
+
2259
+ tst_run_test_with_name(suite, test_ir_basic_ops, ir,
2260
+ "test_multi_ext_reader_basic_ops");
2261
+ tst_run_test_with_name(suite, test_ir_get_doc, ir,
2262
+ "test_multi_ext_get_doc");
2263
+ tst_run_test_with_name(suite, test_ir_compression, ir,
2264
+ "test_multi_ext_compression");
2265
+ tst_run_test_with_name(suite, test_ir_term_enum, ir,
2266
+ "test_multi_ext_term_enum");
2267
+ tst_run_test_with_name(suite, test_ir_term_doc_enum, ir,
2268
+ "test_multi_ext_term_doc_enum");
2269
+ tst_run_test_with_name(suite, test_ir_term_vectors, ir,
2270
+ "test_multi_ext_term_vectors");
2271
+ tst_run_test_with_name(suite, test_ir_mtdpe, ir,
2272
+ "test_multi_ext_multiple_term_doc_pos_enum");
2273
+
2274
+ tst_run_test_with_name(suite, test_ir_norms, &multi_external_reader_type,
2275
+ "test_multi_ext_norms");
2276
+ tst_run_test_with_name(suite, test_ir_delete, &multi_external_reader_type,
2277
+ "test_multi_ext_reader_delete");
2278
+
2279
+ frt_ir_close(ir);
2280
+ reader_test_env_destroy(rte);
2281
+
2282
+ /* Test Add Indexes */
2283
+ rte = reader_test_env_new(add_indexes_reader_type);
2284
+ ir = reader_test_env_ir_open(rte);
2285
+
2286
+ tst_run_test_with_name(suite, test_ir_basic_ops, ir,
2287
+ "test_add_indexes_reader_basic_ops");
2288
+ tst_run_test_with_name(suite, test_ir_get_doc, ir,
2289
+ "test_add_indexes_get_doc");
2290
+ tst_run_test_with_name(suite, test_ir_compression, ir,
2291
+ "test_add_indexes_compression");
2292
+ tst_run_test_with_name(suite, test_ir_term_enum, ir,
2293
+ "test_add_indexes_term_enum");
2294
+ tst_run_test_with_name(suite, test_ir_term_doc_enum, ir,
2295
+ "test_add_indexes_term_doc_enum");
2296
+ tst_run_test_with_name(suite, test_ir_term_vectors, ir,
2297
+ "test_add_indexes_term_vectors");
2298
+ tst_run_test_with_name(suite, test_ir_mtdpe, ir,
2299
+ "test_add_indexes_multiple_term_doc_pos_enum");
2300
+
2301
+ tst_run_test_with_name(suite, test_ir_norms, &add_indexes_reader_type,
2302
+ "test_add_indexes_norms");
2303
+ tst_run_test_with_name(suite, test_ir_delete, &add_indexes_reader_type,
2304
+ "test_add_indexes_reader_delete");
2305
+
2306
+ frt_ir_close(ir);
2307
+ reader_test_env_destroy(rte);
2308
+
2309
+ /* Other FrtIndexReader Tests */
2310
+ tst_run_test_with_name(suite, test_ir_read_while_optimizing, store,
2311
+ "test_ir_read_while_optimizing_in_ram");
2312
+
2313
+ fs_store = frt_open_fs_store(TEST_DIR);
2314
+ tst_run_test_with_name(suite, test_ir_read_while_optimizing, fs_store,
2315
+ "test_ir_read_while_optimizing_on_disk");
2316
+ fs_store->clear_all(fs_store);
2317
+ frt_store_deref(fs_store);
2318
+
2319
+ tst_run_test(suite, test_ir_multivalue_fields, store);
2320
+
2321
+ frt_store_deref(store);
2322
+ return suite;
2323
+ }