ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -7,6 +7,8 @@ class IndexWriterTest < Test::Unit::TestCase
7
7
 
8
8
  def setup()
9
9
  @dir = Ferret::Store::RAMDirectory.new
10
+ fis = FieldInfos.new()
11
+ fis.create_index(@dir)
10
12
  end
11
13
 
12
14
  def tear_down()
@@ -18,7 +20,7 @@ class IndexWriterTest < Test::Unit::TestCase
18
20
  clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
19
21
  assert(! wlock.locked?)
20
22
  assert(! clock.locked?)
21
- iw = IndexWriter.new(@dir, :create => true)
23
+ iw = IndexWriter.new(:dir => @dir, :create => true)
22
24
  assert(@dir.exists?("segments"))
23
25
  assert(wlock.locked?)
24
26
  iw.close()
@@ -28,26 +30,408 @@ class IndexWriterTest < Test::Unit::TestCase
28
30
  end
29
31
 
30
32
  def test_add_document
31
- iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
32
- doc = IndexTestHelper.prepare_document()
33
- iw.add_document(doc)
33
+ iw = IndexWriter.new(:dir => @dir,
34
+ :analyzer => StandardAnalyzer.new(),
35
+ :create => true)
36
+ iw << {:title => "first doc", :content => ["contents of", "first doc"]}
34
37
  assert_equal(1, iw.doc_count)
38
+ iw << ["contents of", "second doc"]
39
+ assert_equal(2, iw.doc_count)
40
+ iw << "contents of third doc"
41
+ assert_equal(3, iw.doc_count)
35
42
  iw.close()
36
43
  end
37
44
 
38
- def test_add_documents
39
- iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
40
- # uncomment the following line to see logging
41
- #iw.info_stream = $stdout
45
+ def test_add_documents_fuzzy
46
+ iw = IndexWriter.new(:dir => @dir,
47
+ :analyzer => StandardAnalyzer.new())
42
48
  iw.merge_factor = 3
43
- iw.min_merge_docs = 3
44
- docs = IndexTestHelper.prepare_book_list()
45
- docs.each_with_index do |doc, i|
46
- #puts "Index doc " + i.to_s
49
+ iw.max_buffered_docs = 3
50
+
51
+ # add 100 documents
52
+ 100.times do
53
+ doc = random_doc()
47
54
  iw.add_document(doc)
48
55
  end
49
- assert_equal(37, iw.doc_count)
56
+ assert_equal(100, iw.doc_count)
50
57
  iw.close()
51
58
  end
52
59
 
60
+ private
61
+
62
+ WORDS = [
63
+ "desirous", "hollowness's", "camp's", "Senegal", "broadcaster's",
64
+ "pecking", "Provence", "paternalism", "premonition", "Dumbo's",
65
+ "Darlene's", "Elbert's", "substrate", "Camille", "Menkalinan", "Cooper",
66
+ "decamps", "abatement's", "bindings", "scrubby", "subset", "ancestor's",
67
+ "pelagic", "abscissa", "loofah's", "gleans", "boudoir", "disappointingly",
68
+ "guardianship's", "settlers", "Mylar", "timetable's", "parabolic",
69
+ "madams", "bootlegger's", "monotonically", "gage", "Karyn's", "deposed",
70
+ "boozy", "swordfish's", "Chevron", "Victrola", "Tameka", "impels",
71
+ "carrels", "salami's", "celibate", "resistance's", "duration",
72
+ "abscissae", "Kilroy's", "corrosive", "flight's", "flapper", "scare",
73
+ "peppiest", "Pygmies", "Menzies", "wrist's", "enumerable", "housecoats",
74
+ "Khwarizmi's", "stampeding", "hungering", "steeping", "Yemenis",
75
+ "entangles", "solver", "mishapping", "Rand's", "ninety", "Boris",
76
+ "impedimenta", "predators", "ridge", "wretchedness's", "crapping", "Head",
77
+ "Edwards", "Claude's", "geodesics", "verities", "botch", "Short's",
78
+ "vellum's", "coruscates", "hydrogenates", "Haas's", "deceitfulness",
79
+ "cohort's", "Cepheus", "totes", "Cortez's", "napalm", "fruitcake",
80
+ "coordinated", "Coulomb", "desperation", "behoves", "contractor's",
81
+ "vacationed", "Wanamaker's", "leotard", "filtrated", "cringes", "Lugosi",
82
+ "sheath's", "orb", "jawed", "Isidro", "geophysics", "persons", "Asians",
83
+ "booze's", "eight's", "backslappers", "hankered", "dos", "helpings",
84
+ "tough", "interlarding", "gouger", "inflect", "Juneau's", "hay's",
85
+ "sardining", "spays", "Brandi", "depressant", "space", "assess",
86
+ "reappearance's", "Eli's", "Cote", "Enoch", "chants", "ruffianing",
87
+ "moralised", "unsuccessfully", "or", "Maryland's", "mildest", "unsafer",
88
+ "dutiful", "Pribilof", "teas", "vagued", "microbiologists", "hedgerow",
89
+ "speller's", "conservators", "catharsis", "drawbacks", "whooshed",
90
+ "unlawful", "revolve", "craftsmanship", "destabilise", "Margarito",
91
+ "Asgard's", "spawn's", "Annabel's", "canonicals", "buttermilk",
92
+ "exaltation's", "pothole", "reprints", "approximately", "homage",
93
+ "Wassermann's", "Atlantic's", "exacerbated", "Huerta", "keypunching",
94
+ "engagements", "dilate", "ponchos", "Helvetius", "Krakatoa", "basket's",
95
+ "stepmother", "schlock's", "drippings", "cardiology's", "northwesterly",
96
+ "cruddier", "poesies", "rustproof", "climb", "miscalled", "Belgians",
97
+ "Iago", "brownout", "nurseries", "hooliganism's", "concourse's",
98
+ "advocate", "sunrise's", "hyper", "octopus's", "erecting",
99
+ "counterattacking", "redesign", "studies", "nitrating", "milestone",
100
+ "bawls", "Nereid", "inferring", "Ontario's", "annexed", "treasury",
101
+ "cosmogony's", "scandalised", "shindig's", "detention's",
102
+ "Lollobrigida's", "eradicating", "magpie", "supertankers", "Adventist's",
103
+ "dozes", "Artaxerxes", "accumulate", "dankest", "telephony", "flows",
104
+ "Srivijaya's", "fourteen's", "antonym", "rancid", "briefing's",
105
+ "theologian", "Jacuzzi", "gracing", "chameleon's", "Brittney's",
106
+ "Pullmans", "Robitussin's", "jitterier", "mayonnaise's", "fort",
107
+ "closeouts", "amatory", "Drew's", "cockfight", "pyre", "Laura's",
108
+ "Bradley's", "obstructionists", "interventions", "tenderness's",
109
+ "loadstones", "castigation's", "undercut", "volubly", "meditated",
110
+ "Ypsilanti", "Jannie's", "tams", "drummer's", "inaugurations", "mawing",
111
+ "Anglophile", "Sherpa", "footholds", "Gonzalo", "removers",
112
+ "customisation", "procurement's", "allured", "grimaced", "captaining",
113
+ "liberates", "grandeur's", "Windsor", "screwdrivers", "Flynn's",
114
+ "extortionists", "carnivorous", "thinned", "panhandlers", "trust's",
115
+ "bemoaned", "untwisted", "cantors", "rectifies", "speculation",
116
+ "niacin's", "soppy", "condom", "halberd", "Leadbelly", "vocation's",
117
+ "tanners", "chanticleer", "secretariats", "Ecuador's", "suppurated",
118
+ "users", "slag's", "atrocity's", "pillar", "sleeveless", "bulldozers",
119
+ "turners", "hemline", "astounded", "rosaries", "Mallarmé", "crucifies",
120
+ "Maidenform", "contribution", "evolve", "chemicals", "uteri",
121
+ "expostulation", "roamers", "daiquiris", "arraignment", "ribs", "King's",
122
+ "Persepolis", "arsenic's", "blindfolds", "bloodsucker's", "restocks",
123
+ "falconry", "Olympia's", "Colosseum's", "vigils", "Louie's",
124
+ "unwillingly", "sealed", "potatoes", "Argentine", "audit's", "outworn",
125
+ "boggles", "likely", "alleging", "Tinkerbell", "redistribution's",
126
+ "Normandy", "Cortes", "porter's", "buntings", "cornucopias", "rosewoods",
127
+ "shelf's", "airdrops", "summits", "Rosalyn", "redecorating", "twirlers",
128
+ "monsters", "directed", "semiautomatics", "Foch", "Hobart", "mutilates",
129
+ "Wilma's", "ornamenting", "Clifford's", "pyromania", "Strasbourg",
130
+ "bleeders", "additions", "super", "effortlessly", "piecing", "vacations",
131
+ "gybes", "warranted", "Ting", "her", "histrionic", "marshaled", "spore's",
132
+ "villainy's", "brat", "confusion", "amphitheatre's", "adjourns",
133
+ "guzzled", "Visayans", "rogue's", "morsels", "candlestick", "flaks",
134
+ "Waterbury", "pulp's", "endorser's", "postdoc", "coffining", "swallowing",
135
+ "Wrangell", "Marcie's", "Marley", "untapped", "fear's", "Kant",
136
+ "pursuit's", "normally", "jackals", "orals", "Paramaribo's", "Marilyn's",
137
+ "Diem's", "narrower", "medicinally", "chickweed's", "pretentiousness",
138
+ "Lardner", "baritone's", "purrs", "Pam's", "pestles", "Philip's",
139
+ "Titania", "eccentrics", "Albion's", "greed's", "raggediest",
140
+ "importations", "Truman", "incentives", "typified", "incurred",
141
+ "bandstands", "Minnie's", "pleasant", "Sandy's", "perplexities",
142
+ "crease's", "obliques", "backstop", "Nair's", "perusing", "Quixote's",
143
+ "sicknesses", "vapour's", "butte", "lariats", "disfavours", "McGuffey",
144
+ "paediatric", "filtered", "whiff's", "gunboats", "devolved",
145
+ "extravaganza's", "organism", "giggling", "citadel's", "counterbalances",
146
+ "executrixes", "Cathay", "marshmallow's", "iniquitous", "Katmai", "Siva",
147
+ "welled", "impertinence's", "plunger", "rice", "forgers", "Larousse",
148
+ "pollution's", "medium", "residue's", "rumbas", "Odis", "arrogant",
149
+ "Jasper's", "panged", "doubted", "vistaing", "decibel's", "modulus's",
150
+ "chickpea's", "mugger's", "potentates", "sequesters", "academy's",
151
+ "Turk's", "pharmacology's", "defogger", "clomp", "soulless", "elastic",
152
+ "la's", "shards", "unfortunate", "counterclaim's", "objections", "towel",
153
+ "converged", "z", "ionisation", "stirrups", "antiquarians", "constructor",
154
+ "virtuosity's", "Göteborg", "centigramme's", "translators", "dalliance's",
155
+ "us", "bullfight", "drawer's", "nonconformist", "handcrafts", "Magritte",
156
+ "tulle", "plant's", "routine", "colour's", "latency's", "repertoire's",
157
+ "photocopies", "catalyse", "ashrams", "lagging", "flapjack's",
158
+ "ayatollahs", "decentest", "pitted", "conformity", "jack", "batsman",
159
+ "electrifies", "Unitarians", "obtain", "medicates", "tumour's",
160
+ "nutritionally", "haystack", "bustles", "slut", "satirising", "birettas",
161
+ "starring", "Kubrick's", "flogs", "chequering", "Menkalinan's",
162
+ "Barbados's", "Bioko", "swinish", "hades", "perjured", "timing's",
163
+ "cocaine", "ejecting", "rationalises", "dilettante's", "umping",
164
+ "capsized", "frogmen", "matt", "prostituting", "bola's", "devolution's",
165
+ "poxing", "Maritza's", "snob's", "scoped", "Costco", "feral", "sirocco",
166
+ "rebating", "truculence", "junkier", "nabs", "elicit", "allegiance",
167
+ "care", "arteriosclerosis's", "nonproliferation's", "doxologies",
168
+ "disconsolate", "bodega", "designers", "Rembrandt", "apostasies",
169
+ "garrulousness", "Hertzsprung's", "hayseeds", "noncooperation's",
170
+ "resentment", "cuticles", "sandboxes", "gimmicks", "magnolia",
171
+ "invalidity's", "pulverised", "Tinkerbell's", "hypoglycemics",
172
+ "gunboat's", "workbench's", "fleetingly's", "sportsman's", "trots",
173
+ "decomposes", "discrepancies", "owls", "obscener", "organic", "stoutness",
174
+ "councillor's", "Philippine's", "Aline", "coarsening", "suffocated",
175
+ "infighting's", "peculiarity", "roof's", "premier", "sucked", "churl",
176
+ "remounts", "intends", "wiles", "unfold", "unperturbed", "wainscotings",
177
+ "restfuller", "ashtray's", "wader's", "decanters", "gild", "tandems",
178
+ "spooked", "galling", "annuity's", "opacity", "clamour's", "flaccid",
179
+ "caroming", "savvying", "mammalian's", "toadstool's", "doohickey", "jibs",
180
+ "conquests", "dishes", "effusively", "distinctions", "curly", "Peckinpah",
181
+ "whining", "quasar", "sponge", "infrequent", "Novembers", "cowling",
182
+ "poem's", "muzzles", "Sufi", "authoritarians", "prompts", "Gavin's",
183
+ "morphology's", "shenanigan", "narrated", "rapprochement", "Heine",
184
+ "propane's", "addition", "prefect's", "pining", "dwindles",
185
+ "compulsiveness's", "objectors", "trudging", "segregates", "language",
186
+ "enthralled", "explosiveness", "toeing", "drainers", "Merrimack's",
187
+ "smarten", "bigwig's", "embroiders", "Medicaids", "grammar's", "behest's",
188
+ "chiseled", "equalled", "factual", "Casablanca's", "dams",
189
+ "disillusioned", "turtleneck", "Baden", "provinces", "bushwhacked", "fey",
190
+ "Yangtze", "loan's", "decent", "strobe", "challenger's", "hometown",
191
+ "Neal", "Ernestine's", "magnetises", "minute", "patrol", "Starbucks",
192
+ "Bernstein", "signal", "interplanetary", "tweak", "archdeacon",
193
+ "untoward", "transducer", "azaleas", "levied", "worlds", "talks",
194
+ "Tancred", "hairsplitting's", "edibility's", "confab", "rosetted",
195
+ "Spanish", "Americanisation", "Charley", "realm's", "incongruities",
196
+ "chinstraps", "dollhouses", "binocular", "popgun", "physiotherapy's",
197
+ "knave's", "angelically", "heartbreaking", "clarions", "bespeaks",
198
+ "pivotal", "Zosma", "ungrammatical", "dilution", "tidily", "Dejesus's",
199
+ "taller", "pennyweight's", "freshman", "Jamestown", "chiefer", "amen",
200
+ "attiring", "appurtenance's", "opiates", "mottoes", "towellings", "ashen",
201
+ "font's", "spoors", "pupil", "groom's", "skimpy", "achieves",
202
+ "intolerance's", "ardour's", "exorcist", "bottoming", "snag's",
203
+ "Frenches", "hysteric's", "ladyfinger's", "differences", "seed",
204
+ "clubfoot's", "glades", "Elton's", "jargon", "Waldo", "grinning",
205
+ "coherence's", "winos", "turnround", "appended", "Ethelred's", "delete",
206
+ "steadfastness's", "miss", "thermoplastic", "depraves", "unctuous",
207
+ "reanimates", "transfusing", "protects", "Babbage's", "foists", "inn",
208
+ "etched", "sanctimoniously", "idling", "timepiece", "holistic",
209
+ "waterside", "ulna's", "swindled", "employables", "zebra", "nieces",
210
+ "pertained", "usages", "vamp's", "Larry's", "cooler's", "holographs",
211
+ "clewing", "stubborning", "peaked", "underfeeds", "marshmallows",
212
+ "agreeable", "beards", "Slovenia's", "nitroglycerin", "palls", "impurer",
213
+ "armours", "stomachaches", "notification's", "Dixieland's", "crozier's",
214
+ "neurotic", "kudos", "Tania's", "M", "soundtrack's", "territory's",
215
+ "sped", "house's", "divisibility", "ingress's", "pummelled", "Isabel",
216
+ "Dewitt", "seemly", "hutched", "calliope", "lengthwise", "flubs",
217
+ "Moldavia's", "Mercia", "McBride's", "Lenten", "pulverise", "football",
218
+ "oligarchy", "Max", "scribbler", "acclimatize", "brainwashes",
219
+ "apprenticed", "benevolences", "two", "Wodehouse", "crew's", "massacre",
220
+ "proportionals", "Jewishness's", "instep's", "emissary", "folder",
221
+ "nonentity's", "convinced", "caption", "kangarooed", "dogie",
222
+ "vagabonding", "auction's", "appraising", "antimony", "part's",
223
+ "longitude's", "inconsiderateness's", "pawning", "serer", "solos",
224
+ "histories", "mushy", "parturition", "munched", "oregano", "inanest",
225
+ "dryness", "kitchenware", "unexpected", "covens", "cheesecakes",
226
+ "stakeout's", "Pulaski's", "Yoknapatawpha's", "pinhead", "drifted",
227
+ "guzzler's", "funking", "sou'wester", "oesophagus's", "highbrow",
228
+ "contralto", "meningitis", "Mazzini", "raggedest", "vaginas", "misfiring",
229
+ "margaritas", "wedder", "pointed", "slicked", "garlanded", "comeuppances",
230
+ "vassals", "Sui", "Concord", "bozos", "Garry's", "Maribel's", "epileptic",
231
+ "Jehoshaphat's", "revolutionary's", "kneecaps", "songbird", "actively",
232
+ "Meredith", "toddler", "distrusting", "fuchsias", "perusal", "instills",
233
+ "deathbed", "sunspot's", "spatula's", "Muscovy", "humaniser", "Keats",
234
+ "regrets", "deflect", "theories", "nonpluses", "populating", "leniency's",
235
+ "penicillin's", "gaol's", "borough", "moose's", "dogmata",
236
+ "transcendentally", "supposition's", "nursed", "Gagarin's", "honest",
237
+ "Chandrasekhar's", "mudslinger's", "parable", "bonged", "Wyeth's",
238
+ "Ochoa's", "Grenoble", "steamy", "halter's", "rotisserie's", "pagoda's",
239
+ "wallaby's", "Yank", "pretzel", "rapist's", "estrange", "hectored",
240
+ "Puebla's", "conniver", "creditor's", "dole's", "Fotomat", "patents",
241
+ "heckling", "thickener", "etches", "yogi", "hemstitched", "obverses",
242
+ "Lipizzaner", "divert", "Strong's", "sagest", "Alabama", "He", "Carrie's",
243
+ "obligation's", "verity's", "outed", "Rhee", "bluffed", "codas",
244
+ "crèche's", "unpalatable", "dilettanti", "vestment", "purse's",
245
+ "inflammation's", "bookmarked", "doing's", "whinnying", "impersonators",
246
+ "Theiler", "scurried", "resistor", "southerners", "Anacreon",
247
+ "reconstruction's", "footage", "trespassing", "Kafka", "bottling",
248
+ "stays", "Gretzky", "overburdening", "princesses", "weathercock's",
249
+ "atolls", "cheerier", "packet", "surrenders", "teacup", "Sabik's",
250
+ "undecidable", "lollygagged", "pawl's", "anaesthesiology", "sublimely",
251
+ "contortionists", "motorcades", "Maureen", "lamasery", "yourselves",
252
+ "Creighton", "poliomyelitis's", "civil", "outmanoeuvre", "lauded",
253
+ "closeness", "Humboldt's", "pretzels", "ungrudging", "blackguard's",
254
+ "sickles", "typo", "narcotics", "linesman", "psychotics", "pictured",
255
+ "deviltry", "Yahtzee", "Lovelace's", "cerebra", "airiness's", "bewitch",
256
+ "how", "motherland's", "crate's", "Keenan's", "turnstile's",
257
+ "pedometer's", "carted", "slipping", "fallow", "Canadian", "ladybird's",
258
+ "thump", "shopper's", "enters", "scowls", "nematode", "focused",
259
+ "Riley's", "grainiest", "novas", "snuffled", "leftovers", "deify",
260
+ "Samoan", "pruning", "contenting", "Khachaturian's", "triads",
261
+ "genealogies", "psalmist", "shaming", "appropriated", "ignominies",
262
+ "Beadle's", "MHz", "peerages", "facile", "Seoul", "Janna's", "jig's",
263
+ "mousiness's", "funnier", "delimiter", "watermark", "sheik's", "Reasoner",
264
+ "ipecac's", "curdles", "wronged", "Segovia's", "solders", "Dunne's",
265
+ "contractor", "awards", "hostels", "pinkie's", "Herzl", "misplace",
266
+ "shuttle", "innovative", "vestries", "cosmoses", "trikes", "Casandra's",
267
+ "hokier", "carouser's", "summerhouses", "renascence", "decomposed",
268
+ "Balzac's", "outlast", "shod", "squalling", "smugging", "weighing",
269
+ "omega's", "selects", "fleetingly", "Finland", "petted", "disrespects",
270
+ "fetter", "confound", "brads", "Bosnia's", "preposition's", "guy's",
271
+ "different", "tracts", "paediatrics's", "polygon", "eyetooth's", "Aesop",
272
+ "pentagons", "professions", "homeowner", "looter's", "intimidated",
273
+ "lustre's", "loneliness", "catnapped", "counties", "pailful",
274
+ "Christendom's", "Barents", "penis", "Mumford's", "Nigel", "éclairs",
275
+ "splats", "diabolical", "popularly", "quart", "abjected", "Rasalgethi",
276
+ "camel's", "inimical", "overweening", "distention's", "Advil", "casement",
277
+ "seamier", "avaricious", "sierra's", "caparison's", "moldered", "Cortez",
278
+ "handmaid's", "disappointment", "billowed", "overpopulated", "outsets",
279
+ "ray", "smoother", "overkill", "somber", "tiller's", "zigzag", "adviser",
280
+ "absorption's", "sturdily", "hairy", "bloodmobile", "investiture's",
281
+ "creature", "ripeness's", "Jonathon", "arborvitae's", "skulduggery",
282
+ "bog", "skeleton's", "Kit's", "Panamas", "Ashlee's", "jazzy", "snit",
283
+ "divisive", "caribous", "permuting", "frankest", "annotated", "oak's",
284
+ "meg's", "Gill", "burrito", "dormancy's", "offings", "Nike",
285
+ "outnumbered", "skater's", "Portugal", "deficit", "Cannon's", "pockmark",
286
+ "sediment's", "mailbox", "innuendoed", "retire", "wolfhound's",
287
+ "nicotine's", "brigade's", "mettle's", "softhearted", "hooey's",
288
+ "abdication", "Orval", "Jaime", "ship", "hyphenations", "sectarians",
289
+ "Alabaman", "tagging", "ultras", "schizoids", "medicines", "undersized",
290
+ "Gray", "maternity's", "bandaging", "scooping", "coercion's", "serapes",
291
+ "celebrate", "Listerine's", "throve", "crypt's", "nearsighted",
292
+ "metallurgists", "Delicious", "cotton's", "yoked", "cogitates",
293
+ "underage", "cigarette's", "hallways", "Cointreau", "ma'am", "spacing's",
294
+ "foresight", "parkway's", "Edwardian", "mediator", "Turner", "Derrida's",
295
+ "motorist's", "hobo", "equivalences", "sophism", "peeping", "telescoped",
296
+ "overproduce", "ductility", "Leblanc", "refractory", "passé", "decodes",
297
+ "womanising", "flax's", "pond's", "infrequency", "talkativeness's",
298
+ "settlement's", "Prince", "bating", "multimillionaire", "Schultz",
299
+ "premiss", "quackery", "bathhouse", "Leno's", "Monday's", "Hung's",
300
+ "undaunted", "bewaring", "tension's", "Chile's", "Rostand's", "platoons",
301
+ "rodeo's", "Dionne", "Dyson's", "gingivitis's", "fewer",
302
+ "electromagnetism's", "scrubbier", "ensconced", "wretcheder", "mica's",
303
+ "expectorant", "snapper's", "chastised", "habitation", "spry", "bathing",
304
+ "stealth's", "champagnes", "baleful", "fencing's", "threaded", "codicils",
305
+ "disgraced", "redcaps", "addends", "Olivier", "clasped", "Gwendolyn",
306
+ "foment", "angularity's", "strenuously", "gorilla", "misbehaved",
307
+ "surplus's", "newsier", "positioned", "bloodmobiles", "circumstantials",
308
+ "person's", "varicose", "Calliope", "plethora", "Olmsted",
309
+ "reconciliation", "Brendan's", "beset", "totters", "sailors",
310
+ "parliamentarians", "Whitaker", "hilts", "pummelling", "academician's",
311
+ "ruse", "discreeter", "appetisingly", "perfections", "anus", "overrode",
312
+ "pedantry's", "possessed", "germs", "unscrews", "expired",
313
+ "semitrailer's", "Cupid's", "nonsmoker", "Marathon", "secs", "Hopkins",
314
+ "freeing", "libelled", "furious", "staccatos", "electroencephalogram's",
315
+ "malingerer's", "impulses", "briars", "Tran", "hilltops", "sulks",
316
+ "quailed", "fads", "retrenches", "spouted", "outtake", "puncture's",
317
+ "rats", "kibitzed", "berets", "omnivorous", "flange", "Mons", "glints",
318
+ "mansards", "thou", "cuing", "suspected", "Kaiser's", "savvier", "skits",
319
+ "interdict's", "Booker", "Rubinstein", "Tm's", "crossing's", "dewlap",
320
+ "guarantor's", "edification's", "joyfullest", "crossed", "chowdering",
321
+ "sillier", "reloading", "commodity's", "bodkins", "conduced", "coughs",
322
+ "nucleus's", "sixtieth", "proverbially", "comprehensive", "ineluctably",
323
+ "patrolmen", "resuscitating", "carpetbag's", "Darrin's", "Yeager",
324
+ "Bataan's", "spoonsful", "proceeds", "wrongdoer", "Karroo", "heart",
325
+ "poison", "typifying", "endowment's", "aquanauts", "deaconesses",
326
+ "homosexuality", "Maxine", "haunching", "centred", "Peking's",
327
+ "toothiest", "growers", "firebombs", "throbs", "Downy", "contribution's",
328
+ "sago's", "Cole", "Knoxville", "leftmost", "Nell's", "Baffin", "barrings",
329
+ "contagions", "disencumbers", "countdown", "quintuple", "perihelion",
330
+ "creationism's", "actioning", "admiralty", "Mt's", "durability's",
331
+ "sewer's", "replicas", "oxide", "ripened", "Pisces's", "Cinerama's",
332
+ "catheters", "oppressive", "roosting", "foggiest", "properly", "Kareem",
333
+ "Ollie", "minuted", "vehicles", "eel", "remunerates", "swashbuckler's",
334
+ "remunerative", "sanguining", "Belem's", "forlornly", "rudders",
335
+ "officialdom", "countertenors", "Upton", "whoop", "animations", "arouses",
336
+ "millionths", "videocassette", "fledgling", "shake", "exterminated",
337
+ "Cain's", "trendiest", "wariest", "torpedoes", "airmails", "Cameron's",
338
+ "discord's", "spitefulness's", "thudded", "menaced", "takeovers",
339
+ "solicited", "wallpapers", "economic", "cache", "rechargeable", "gongs",
340
+ "droning", "exemption", "Alaskans", "toothed", "snifter", "Stephens",
341
+ "prejudge", "doctor's", "bobolinks", "rotates", "valuation's", "narrator",
342
+ "weaning", "uncle", "shelter", "destitution's", "Edgardo's", "gauge",
343
+ "Nice", "Adolf's", "rheumatics", "inheritances", "undesirables",
344
+ "Eileen's", "flyweight's", "scope", "possessiveness", "tipsily",
345
+ "effulgence", "rematch", "Baltic", "unsteadiest", "rodeos", "gloaming's",
346
+ "ringers", "randomised", "commissars", "destroyer's", "router",
347
+ "disengaging", "it's", "Albert", "rampantly", "varmint", "Adkins",
348
+ "chevron", "insomniac", "bobsledded", "masochist's", "chronometers",
349
+ "compaction", "Mauro", "sidled", "Highlander's", "snail's", "syllabifies",
350
+ "application's", "symmetrical", "blacking", "accent's", "sentimentalists",
351
+ "sonatas", "profanities", "sloping", "Araby", "percolate", "repeated",
352
+ "youthfulness's", "Loyola", "deliriously", "matriarch's", "tailors",
353
+ "rerouting", "hairpin", "dispersal", "endowment", "disquieting", "swat",
354
+ "neckerchieves", "wrinkles", "amoebas", "Darcy", "orthodontics's",
355
+ "milder", "sneezing", "prescience's", "pads", "wrought", "perspicuity's",
356
+ "materialist", "pull", "laundryman's", "lazily", "protractor's", "Vic",
357
+ "photocopier", "guardrooms", "cablecasting", "confirms", "excretions",
358
+ "combatant", "counterfeiters", "periwig", "genteelest", "router's",
359
+ "springy", "procreated", "syphon", "parent's", "bigwigs", "rebelled",
360
+ "milkmaids", "McGee's", "seaworthier", "Bellatrix's", "tenement",
361
+ "embryologists", "Vaselining", "burrow's", "tonnage's", "Petty's",
362
+ "chancels", "scouring", "mouser", "recompensed", "guarding", "editor",
363
+ "raster", "bourgeoisie's", "interpolating", "skinflint's", "transport",
364
+ "bullfinch", "needlessly", "withholds", "counterclockwise", "panicking",
365
+ "Ahriman", "flambeing", "contrary", "heartstrings", "whittled", "crib's",
366
+ "highlighter", "extroverted", "Martinique's", "racquets", "Maldivian",
367
+ "physiognomy", "Hammarskjold", "massage", "shingling", "neighbourhood",
368
+ "boobed", "vulture", "intercontinental", "cobblers", "peddlers",
369
+ "forthrightly", "germicide", "raindrop's", "fir's", "decaffeinates",
370
+ "wobblier", "abnegated", "cruiser's", "satiety", "trilled", "impending",
371
+ "gulf", "mountebank", "beltway", "reappointment", "cinematographer",
372
+ "pylon", "penthouses", "morally", "installs", "Walsh's", "drawstring",
373
+ "circus's", "Khayyam's", "Myrtle's", "ventrals", "category's",
374
+ "opportunistic", "grovelling", "warier", "upchuck", "hairdresser's",
375
+ "Montanans", "jobber", "dazzle", "encirclement's", "muffin's", "coronets",
376
+ "focus's", "footfall's", "subjunctives", "late", "pedagogued",
377
+ "dignitaries", "content", "blockbusters", "reminiscent", "mayor",
378
+ "specifier", "extinction", "nutshell's", "catbird's", "bundle",
379
+ "gracefulness", "exceed", "estranges", "chancy", "bankrupted", "Avery",
380
+ "Barnett", "succulence", "stacking", "ensnare", "truck", "embargo",
381
+ "persecutes", "translation's", "muskrat's", "illumines", "undercoat's",
382
+ "fleecier", "brick", "qualities", "imprecision", "reprisals", "discounts",
383
+ "harmonics", "Mann's", "terrorism", "interminable", "Santiago's",
384
+ "deepness", "tramples", "golder", "voyeurism's", "tent", "particle's",
385
+ "minuend", "waxwings", "knobby", "trustee", "funnily", "hotheadedness's",
386
+ "Kristin", "what", "bite", "murmur's", "pustule's", "weeknights",
387
+ "rocked", "athlete", "ventilates", "impresses", "daguerreotyping",
388
+ "Gross", "gambols", "villa", "maraud", "disapproval", "apostrophe's",
389
+ "sheaf", "noisemaker's", "autonomy's", "massing", "daemon's", "Thackeray",
390
+ "fermenting", "whammy", "philosophise", "empathy", "calamities",
391
+ "sunbathe", "Qom", "yahoo's", "coxcomb's", "move", "school's",
392
+ "rainmakers", "shipwreck", "potbelly's", "courageously", "current",
393
+ "Aleut", "treaties", "U", "always", "Bosch", "impregnating", "bud's",
394
+ "carat", "centrists", "acquaintance's", "convoy's", "chichis",
395
+ "restraint's", "Cosby", "factotums", "handshaking", "paragon's",
396
+ "mileages", "Tammie", "cartoonists", "lemmas", "lowliness's", "onion's",
397
+ "E's", "Bible", "Cranmer", "fob's", "minks", "overstocking", "Willamette",
398
+ "needle's", "scuppers", "Carborundum", "upwardly", "tallies", "aptitude",
399
+ "synod", "nasturtium's", "Pensacola", "snappish", "merino", "sups",
400
+ "fingerboard's", "prodigy's", "narcissism's", "substantial", "lug",
401
+ "establishing", "Vergil's", "patrimonies", "shorted", "forestation",
402
+ "undeniable", "Katmandu", "lamination", "trollop's", "odd", "stanza",
403
+ "paraplegic", "melanin", "Rico", "foreman", "stereotypes", "affinity's",
404
+ "cleansing", "sautéing", "epochs", "crooners", "manicured", "undisclosed",
405
+ "propel", "usage", "Alioth's", "Aurelia's", "peruse", "Vassar's",
406
+ "Demosthenes's", "Brazos", "supermarket", "scribbles", "Jekyll's",
407
+ "discomfort's", "mastiffs", "ballasting", "Figueroa", "turnstiles",
408
+ "convince", "Shelton's", "Gustavo", "shunting", "Fujitsu's", "fining's",
409
+ "hippos", "dam's", "expressionists", "peewee", "troop's"
410
+ ]
411
+ WORDS_SIZE = WORDS.size
412
+
413
+ def random_word
414
+ return WORDS[rand(WORDS_SIZE)]
415
+ end
416
+
417
+ def random_sentence(max_len)
418
+ sentence = ""
419
+ (1 + rand(max_len)).times { sentence << " " << random_word }
420
+ return sentence
421
+ end
422
+
423
+ def random_doc(max_fields = 10, max_elements = 10, max_len = 100)
424
+ doc = {}
425
+ (1 + rand(max_fields)).times do
426
+ field = random_word.intern
427
+ elem_count = rand(max_elements) + 1
428
+ if (elem_count == 1)
429
+ doc[field] = random_sentence(max_len)
430
+ else
431
+ doc[field] = []
432
+ elem_count.times { doc[field] << random_sentence(max_len)}
433
+ end
434
+ end
435
+ return doc
436
+ end
53
437
  end
@@ -1,8 +1,4 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
1
  module IndexTestHelper
5
- include Ferret::Document
6
2
  include Ferret::Index
7
3
  include Ferret::Analysis
8
4
  include Ferret::Search
@@ -16,23 +12,34 @@ module IndexTestHelper
16
12
  BINARY_DATA = IndexTestHelper.make_binary(256)
17
13
  COMPRESSED_BINARY_DATA = IndexTestHelper.make_binary(56)
18
14
 
19
- def IndexTestHelper.prepare_document
20
- doc = Document.new()
21
-
22
- doc << Field.new("text_field1", "field one text", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
23
- doc << Field.new("text_field2", "field field field two text", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
24
- doc << Field.new("key_field", "keyword", Field::Store::YES, Field::Index::UNTOKENIZED)
25
- doc << Field.new("unindexed_field", "unindexed field text", Field::Store::YES, Field::Index::NO)
26
- doc << Field.new("unstored_field1", "unstored field text one", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO)
27
- doc << Field.new("unstored_field2", "unstored field text two", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::YES)
28
- doc << Field.new("compressed_field", "compressed text", Field::Store::COMPRESS, Field::Index::TOKENIZED, Field::TermVector::YES)
29
- doc << Field.new_binary_field("binary_field", BINARY_DATA, Field::Store::YES)
30
- doc << Field.new_binary_field("compressed_binary_field", COMPRESSED_BINARY_DATA, Field::Store::COMPRESS)
31
- return doc
15
+ def IndexTestHelper.prepare_document(dir)
16
+ fis = FieldInfos.new
17
+ fis.add_field(:text_field1, :term_vector => :no)
18
+ fis.add_field(:text_field2)
19
+ fis.add_field(:key_field, :index => :untokenized)
20
+ fis.add_field(:unindexed_field, :index => :no)
21
+ fis.add_field(:unstored_field1, :store => :no, :term_vector => :no)
22
+ fis.add_field(:unstored_field2, :store => :no, :term_vector => :yes)
23
+ fis.add_field(:compressed_field, :store => :compressed, :term_vector => :yes)
24
+ fis.add_field(:binary_field, :index => :no, :term_vector => :no)
25
+ fis.add_field(:compressed_binary_field, :store => :compressed,
26
+ :index => :no, :term_vector => :no)
27
+ doc = {
28
+ :text_field1 => "field one text",
29
+ :text_field2 => "field field field two text",
30
+ :key_field => "keyword",
31
+ :unindexed_field => "unindexed field text",
32
+ :unstored_field1 => "unstored field text one",
33
+ :unstored_field2 => "unstored field text two",
34
+ :compressed_field => "compressed text",
35
+ :binary_field => BINARY_DATA,
36
+ :compressed_binary_field => COMPRESSED_BINARY_DATA
37
+ }
38
+ return doc, fis
32
39
  end
33
40
 
34
41
  def IndexTestHelper.prepare_documents
35
- data = [
42
+ [
36
43
  ["apple", "green"],
37
44
  ["apple", "red"],
38
45
  ["orange", "orange"],
@@ -41,212 +48,268 @@ module IndexTestHelper
41
48
  ["mandarin", "orange"],
42
49
  ["peach", "orange"],
43
50
  ["apricot", "orange"]
44
- ]
45
-
46
- docs = []
47
-
48
- data.each do |food|
49
- doc = Document.new()
50
- doc << Field.new("name", food[0], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
51
- doc << Field.new("colour", food[1], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
52
- docs << doc
53
- end
54
- return docs
55
- end
56
-
57
- def IndexTestHelper.write_document(dir, doc, segment="test", analyzer = WhiteSpaceAnalyzer.new(), similarity = Similarity.default())
58
- writer = DocumentWriter.new(dir, analyzer, similarity, 50)
59
- writer.add_document(segment, doc)
51
+ ].map { |food| {"name" => food[0], "colour" => food[1]} }
60
52
  end
61
53
 
62
54
  def IndexTestHelper.prepare_book_list
63
55
  books = [
64
- {"author" => "P.H. Newby", "title" => "Something To Answer For", "year" => "1969"},
65
- {"author" => "Bernice Rubens", "title" => "The Elected Member", "year" => "1970"},
66
- {"author" => "V. S. Naipaul", "title" => "In a Free State", "year" => "1971"},
67
- {"author" => "John Berger", "title" => "G", "year" => "1972"},
68
- {"author" => "J. G. Farrell", "title" => "The Siege of Krishnapur", "year" => "1973"},
69
- {"author" => "Stanley Middleton", "title" => "Holiday", "year" => "1974"},
70
- {"author" => "Nadine Gordimer", "title" => "The Conservationist", "year" => "1974"},
71
- {"author" => "Ruth Prawer Jhabvala", "title" => "Heat and Dust", "year" => "1975"},
72
- {"author" => "David Storey", "title" => "Saville", "year" => "1976"},
73
- {"author" => "Paul Scott", "title" => "Staying On", "year" => "1977"},
74
- {"author" => "Iris Murdoch", "title" => "The Sea", "year" => "1978"},
75
- {"author" => "Penelope Fitzgerald", "title" => "Offshore", "year" => "1979"},
76
- {"author" => "William Golding", "title" => "Rites of Passage", "year" => "1980"},
77
- {"author" => "Salman Rushdie", "title" => "Midnight's Children", "year" => "1981"},
78
- {"author" => "Thomas Keneally", "title" => "Schindler's Ark", "year" => "1982"},
79
- {"author" => "J. M. Coetzee", "title" => "Life and Times of Michael K", "year" => "1983"},
80
- {"author" => "Anita Brookner", "title" => "Hotel du Lac", "year" => "1984"},
81
- {"author" => "Keri Hulme", "title" => "The Bone People", "year" => "1985"},
82
- {"author" => "Kingsley Amis", "title" => "The Old Devils", "year" => "1986"},
83
- {"author" => "Penelope Lively", "title" => "Moon Tiger", "year" => "1987"},
84
- {"author" => "Peter Carey", "title" => "Oscar and Lucinda", "year" => "1988"},
85
- {"author" => "Kazuo Ishiguro", "title" => "The Remains of the Day", "year" => "1989"},
86
- {"author" => "A. S. Byatt", "title" => "Possession", "year" => "1990"},
87
- {"author" => "Ben Okri", "title" => "The Famished Road", "year" => "1991"},
88
- {"author" => "Michael Ondaatje", "title" => "The English Patient", "year" => "1992"},
89
- {"author" => "Barry Unsworth", "title" => "Sacred Hunger", "year" => "1992"},
90
- {"author" => "Roddy Doyle", "title" => "Paddy Clarke Ha Ha Ha", "year" => "1993"},
91
- {"author" => "James Kelman", "title" => "How Late It Was, How Late", "year" => "1994"},
92
- {"author" => "Pat Barker", "title" => "The Ghost Road", "year" => "1995"},
93
- {"author" => "Graham Swift", "title" => "Last Orders", "year" => "1996"},
94
- {"author" => "Arundati Roy", "title" => "The God of Small Things", "year" => "1997"},
95
- {"author" => "Ian McEwan", "title" => "Amsterdam", "year" => "1998"},
96
- {"author" => "J. M. Coetzee", "title" => "Disgrace", "year" => "1999"},
97
- {"author" => "Margaret Atwood", "title" => "The Blind Assassin", "year" => "2000"},
98
- {"author" => "Peter Carey", "title" => "True History of the Kelly Gang", "year" => "2001"},
99
- {"author" => "Yann Martel", "title" => "The Life of Pi", "year" => "2002"},
100
- {"author" => "DBC Pierre", "title" => "Vernon God Little", "year" => "2003"}
56
+ {"author" => "P.H. Newby",
57
+ "title" => "Something To Answer For",
58
+ "year" => "1969"},
59
+ {"author" => "Bernice Rubens",
60
+ "title" => "The Elected Member",
61
+ "year" => "1970"},
62
+ {"author" => "V. S. Naipaul",
63
+ "title" => "In a Free State",
64
+ "year" => "1971"},
65
+ {"author" => "John Berger",
66
+ "title" => "G",
67
+ "year" => "1972"},
68
+ {"author" => "J. G. Farrell",
69
+ "title" => "The Siege of Krishnapur",
70
+ "year" => "1973"},
71
+ {"author" => "Stanley Middleton",
72
+ "title" => "Holiday",
73
+ "year" => "1974"},
74
+ {"author" => "Nadine Gordimer",
75
+ "title" => "The Conservationist",
76
+ "year" => "1974"},
77
+ {"author" => "Ruth Prawer Jhabvala",
78
+ "title" => "Heat and Dust",
79
+ "year" => "1975"},
80
+ {"author" => "David Storey",
81
+ "title" => "Saville",
82
+ "year" => "1976"},
83
+ {"author" => "Paul Scott",
84
+ "title" => "Staying On",
85
+ "year" => "1977"},
86
+ {"author" => "Iris Murdoch",
87
+ "title" => "The Sea",
88
+ "year" => "1978"},
89
+ {"author" => "Penelope Fitzgerald",
90
+ "title" => "Offshore",
91
+ "year" => "1979"},
92
+ {"author" => "William Golding",
93
+ "title" => "Rites of Passage",
94
+ "year" => "1980"},
95
+ {"author" => "Salman Rushdie",
96
+ "title" => "Midnight's Children",
97
+ "year" => "1981"},
98
+ {"author" => "Thomas Keneally",
99
+ "title" => "Schindler's Ark",
100
+ "year" => "1982"},
101
+ {"author" => "J. M. Coetzee",
102
+ "title" => "Life and Times of Michael K",
103
+ "year" => "1983"},
104
+ {"author" => "Anita Brookner",
105
+ "title" => "Hotel du Lac",
106
+ "year" => "1984"},
107
+ {"author" => "Keri Hulme",
108
+ "title" => "The Bone People",
109
+ "year" => "1985"},
110
+ {"author" => "Kingsley Amis",
111
+ "title" => "The Old Devils",
112
+ "year" => "1986"},
113
+ {"author" => "Penelope Lively",
114
+ "title" => "Moon Tiger",
115
+ "year" => "1987"},
116
+ {"author" => "Peter Carey",
117
+ "title" => "Oscar and Lucinda",
118
+ "year" => "1988"},
119
+ {"author" => "Kazuo Ishiguro",
120
+ "title" => "The Remains of the Day",
121
+ "year" => "1989"},
122
+ {"author" => "A. S. Byatt",
123
+ "title" => "Possession",
124
+ "year" => "1990"},
125
+ {"author" => "Ben Okri",
126
+ "title" => "The Famished Road",
127
+ "year" => "1991"},
128
+ {"author" => "Michael Ondaatje",
129
+ "title" => "The English Patient",
130
+ "year" => "1992"},
131
+ {"author" => "Barry Unsworth",
132
+ "title" => "Sacred Hunger",
133
+ "year" => "1992"},
134
+ {"author" => "Roddy Doyle",
135
+ "title" => "Paddy Clarke Ha Ha Ha",
136
+ "year" => "1993"},
137
+ {"author" => "James Kelman",
138
+ "title" => "How Late It Was, How Late",
139
+ "year" => "1994"},
140
+ {"author" => "Pat Barker",
141
+ "title" => "The Ghost Road",
142
+ "year" => "1995"},
143
+ {"author" => "Graham Swift",
144
+ "title" => "Last Orders",
145
+ "year" => "1996"},
146
+ {"author" => "Arundati Roy",
147
+ "title" => "The God of Small Things",
148
+ "year" => "1997"},
149
+ {"author" => "Ian McEwan",
150
+ "title" => "Amsterdam",
151
+ "year" => "1998"},
152
+ {"author" => "J. M. Coetzee",
153
+ "title" => "Disgrace",
154
+ "year" => "1999"},
155
+ {"author" => "Margaret Atwood",
156
+ "title" => "The Blind Assassin",
157
+ "year" => "2000"},
158
+ {"author" => "Peter Carey",
159
+ "title" => "True History of the Kelly Gang",
160
+ "year" => "2001"},
161
+ {"author" => "Yann Martel",
162
+ "title" => "The Life of Pi",
163
+ "year" => "2002"},
164
+ {"author" => "DBC Pierre",
165
+ "title" => "Vernon God Little",
166
+ "year" => "2003"}
101
167
  ]
102
- docs = []
103
-
104
- books.each do |book|
105
- doc = Document.new()
106
- doc << Field.new("author", book["author"], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
107
- doc << Field.new("title", book["title"], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
108
- doc << Field.new("year", book["year"], Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
109
- docs << doc
110
- end
111
- return docs
112
168
  end
113
169
 
114
- IR_TEST_DOC_CNT = 64
115
-
116
- def IndexTestHelper.prepare_ir_test_docs()
117
- body = "body"
118
- title = "title"
119
- author = "author"
120
- text = "text"
121
- year = "year"
122
- changing_field = "changing_field"
170
+ def self.prepare_ir_test_fis
171
+ fis = FieldInfos.new
172
+ fis.add_field(:body)
173
+ fis.add_field(:changing_field, :term_vector => :no)
174
+ fis.add_field(:title, :index => :untokenized, :term_vector => :with_offsets)
175
+ fis.add_field(:author, :term_vector => :with_positions)
176
+ fis.add_field(:year, :index => :no, :term_vector => :no)
177
+ fis.add_field(:text, :store => :no, :term_vector => :no)
178
+ end
123
179
 
124
- docs = Array.new(IR_TEST_DOC_CNT)
125
- docs[0] = Document.new()
126
- docs[0] << Field.new(body, "Where is Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
127
- docs[0] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
128
- docs[1] = Document.new()
129
- docs[1] << Field.new(body, "Some Random Sentence read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
130
- docs[2] = Document.new()
131
- docs[2] << Field.new(body, "Some read Random Sentence read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
132
- docs[3] = Document.new()
133
- docs[3] << Field.new(title, "War And Peace", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_OFFSETS)
134
- docs[3] << Field.new(body, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
135
- docs[3] << Field.new(author, "Leo Tolstoy", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS)
136
- docs[3] << Field.new(year, "1865", Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
137
- docs[3] << Field.new(text, "more text which is not stored", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO)
138
- docs[4] = Document.new()
139
- docs[4] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
140
- docs[5] = Document.new()
141
- docs[5] << Field.new(body, "Here's Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
142
- docs[6] = Document.new()
143
- docs[6] << Field.new(body, "Some Random Sentence read read read read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
144
- docs[7] = Document.new()
145
- docs[7] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
146
- docs[8] = Document.new()
147
- docs[8] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
148
- docs[9] = Document.new()
149
- docs[9] << Field.new(body, "read Some Random Sentence read this will be used after unfinished next position read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
150
- docs[10] = Document.new()
151
- docs[10] << Field.new(body, "Some read Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
152
- docs[10] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::YES)
153
- docs[11] = Document.new()
154
- docs[11] << Field.new(body, "And here too. Well, maybe Not", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
155
- docs[12] = Document.new()
156
- docs[12] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
157
- docs[13] = Document.new()
158
- docs[13] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
159
- docs[14] = Document.new()
160
- docs[14] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
161
- docs[15] = Document.new()
162
- docs[15] << Field.new(body, "Some read Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
163
- docs[16] = Document.new()
164
- docs[16] << Field.new(body, "Some Random read read Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
165
- docs[17] = Document.new()
166
- docs[17] << Field.new(body, "Some Random read Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
167
- docs[17] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS)
168
- docs[18] = Document.new()
169
- docs[18] << Field.new(body, "Wally Wally Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
170
- docs[19] = Document.new()
171
- docs[19] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
172
- docs[19] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_OFFSETS)
173
- docs[20] = Document.new()
174
- docs[20] << Field.new(body, "Wally is where Wally usually likes to go. Wally Mart! Wally likes shopping there for Where's Wally books. Wally likes to read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
175
- docs[20] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
176
- docs[21] = Document.new()
177
- docs[21] << Field.new(body, "Some Random Sentence read read read and more read read read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
178
- docs[21] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
180
+ INDEX_TEST_DOC_COUNT = 64
181
+ def self.prepare_ir_test_docs
182
+ docs = []
183
+ docs[0] = {
184
+ :body => "Where is Wally",
185
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
186
+ "word3 word3",
187
+ }
188
+ docs[1] = {
189
+ :body => "Some Random Sentence read"
190
+ }
191
+ docs[2] = {
192
+ :body => "Some read Random Sentence read"
193
+ }
194
+ docs[3] = {
195
+ :title => "War And Peace",
196
+ :body => "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3",
197
+ :author => "Leo Tolstoy",
198
+ :year => "1865",
199
+ :text => "more text which is not stored"
200
+ }
201
+ docs[4] = {
202
+ :body => "Some Random Sentence"
203
+ }
204
+ docs[5] = {
205
+ :body => "Here's Wally"
206
+ }
207
+ docs[6] = {
208
+ :body => "Some Random Sentence read read read read"
209
+ }
210
+ docs[7] = {
211
+ :body => "Some Random Sentence"
212
+ }
213
+ docs[8] = {
214
+ :body => "Some Random Sentence"
215
+ }
216
+ docs[9] = {
217
+ :body => "read Some Random Sentence read this will be used after " +
218
+ "unfinished next position read"
219
+ }
220
+ docs[10] = {
221
+ :body => "Some read Random Sentence",
222
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
223
+ "word3 word3"
224
+ }
225
+ docs[11] = {
226
+ :body => "And here too. Well, maybe Not"
227
+ }
228
+ docs[12] = {
229
+ :body => "Some Random Sentence"
230
+ }
231
+ docs[13] = {
232
+ :body => "Some Random Sentence"
233
+ }
234
+ docs[14] = {
235
+ :body => "Some Random Sentence"
236
+ }
237
+ docs[15] = {
238
+ :body => "Some Random Sentence"
239
+ }
240
+ docs[16] = {
241
+ :body => "Some Random read read Sentence"
242
+ }
243
+ docs[17] = {
244
+ :body => "Some Random read Sentence",
245
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
246
+ "word3 word3"
247
+ }
248
+ docs[18] = {
249
+ :body => "Wally Wally Wally"
250
+ }
251
+ docs[19] = {
252
+ :body => "Some Random Sentence",
253
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
254
+ "word3 word3"
255
+ }
256
+ docs[20] = {
257
+ :body => "Wally is where Wally usually likes to go. Wally Mart! Wally " +
258
+ "likes shopping there for Where's Wally books. Wally likes " +
259
+ "to read",
260
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
261
+ "word3 word3"
262
+ }
263
+ docs[21] = {
264
+ :body => "Some Random Sentence read read read and more read read read",
265
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
266
+ "word3 word3"
267
+ }
179
268
 
180
269
  buf = ""
181
270
  21.times { buf << "skip " }
182
- 22.upto(IR_TEST_DOC_CNT) do |i|
271
+ 22.upto(INDEX_TEST_DOC_COUNT-1) do |i|
183
272
  buf << "skip "
184
- docs[i] = Document.new()
185
- docs[i] << Field.new(text, buf.clone, Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
273
+ docs[i] = {:text => buf.clone}
186
274
  end
187
275
  return docs
188
276
  end
189
277
 
190
- def IndexTestHelper.prepare_search_docs
191
- data = [
192
- {"date" => "20050930", "field" => "word1",
193
- "cat" => "cat1/"},
194
- {"date" => "20051001", "field" => "word1 word2 the quick brown fox",
195
- "cat" => "cat1/sub1"},
196
- {"date" => "20051002", "field" => "word1 word3",
197
- "cat" => "cat1/sub1/subsub1"},
198
- {"date" => "20051003", "field" => "word1 word3",
199
- "cat" => "cat1/sub2"},
200
- {"date" => "20051004", "field" => "word1 word2",
201
- "cat" => "cat1/sub2/subsub2"},
202
- {"date" => "20051005", "field" => "word1",
203
- "cat" => "cat2/sub1"},
204
- {"date" => "20051006", "field" => "word1 word3",
205
- "cat" => "cat2/sub1"},
206
- {"date" => "20051007", "field" => "word1",
207
- "cat" => "cat2/sub1"},
208
- {"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
209
- "cat" => "cat2/sub1"},
210
- {"date" => "20051009", "field" => "word1",
211
- "cat" => "cat3/sub1"},
212
- {"date" => "20051010", "field" => "word1",
213
- "cat" => "cat3/sub1"},
214
- {"date" => "20051011", "field" => "word1 word3 the quick red fox",
215
- "cat" => "cat3/sub1"},
216
- {"date" => "20051012", "field" => "word1",
217
- "cat" => "cat3/sub1"},
218
- {"date" => "20051013", "field" => "word1",
219
- "cat" => "cat1/sub2"},
220
- {"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
221
- "cat" => "cat1/sub1"},
222
- {"date" => "20051015", "field" => "word1",
223
- "cat" => "cat1/sub2/subsub1"},
224
- {"date" => "20051016",
225
- "field" => "word1 the quick fox is brown and hairy and a little red",
226
- "cat" => "cat1/sub1/subsub2"},
227
- {"date" => "20051017", "field" => "word1 the brown fox is quick and red",
228
- "cat" => "cat1/"}
229
- ]
230
-
231
- docs = []
232
- data.each_with_index do |fields, i|
233
- doc = Document.new()
234
- doc.boost = i+1
278
+ INDEX_TEST_DOCS = self.prepare_ir_test_docs()
279
+ INDEX_TEST_FIS = self.prepare_ir_test_fis()
235
280
 
236
- fields.each_pair do |field, text|
237
- doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
238
- end
239
- docs << doc
281
+ def self.prepare_search_docs
282
+ i = 1
283
+ [
284
+ ["20050930", "cat1/", "word1" ],
285
+ ["20051001", "cat1/sub1", "word1 word2 the quick brown fox" ],
286
+ ["20051002", "cat1/sub1/subsub1", "word1 word3" ],
287
+ ["20051003", "cat1/sub2", "word1 word3" ],
288
+ ["20051004", "cat1/sub2/subsub2", "word1 word2" ],
289
+ ["20051005", "cat2/sub1", "word1" ],
290
+ ["20051006", "cat2/sub1", "word1 word3" ],
291
+ ["20051007", "cat2/sub1", "word1" ],
292
+ ["20051008", "cat2/sub1", "word1 word2 word3 the fast brown fox"],
293
+ ["20051009", "cat3/sub1", "word1" ],
294
+ ["20051010", "cat3/sub1", "word1" ],
295
+ ["20051011", "cat3/sub1", "word1 word3 the quick red fox" ],
296
+ ["20051012", "cat3/sub1", "word1" ],
297
+ ["20051013", "cat1/sub2", "word1" ],
298
+ ["20051014", "cat1/sub1", "word1 word3 the quick hairy fox" ],
299
+ ["20051015", "cat1/sub2/subsub1", "word1" ],
300
+ ["20051016", "cat1/sub1/subsub2",
301
+ "word1 the quick fox is brown and hairy and a little red" ],
302
+ ["20051017", "cat1/",
303
+ "word1 the brown fox is quick and red" ]
304
+ ].map do |date, category, field|
305
+ doc = Ferret::Document.new(i)
306
+ i += 1
307
+ doc[:date] = date
308
+ doc[:category] = category
309
+ doc[:field] = field
310
+ doc
240
311
  end
241
- return docs
242
- end
243
-
244
- def IndexTestHelper.explain (query, searcher, field)
245
- top_docs = searcher.search(query)
246
- top_docs.score_docs.each { |sd|
247
- puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
248
- }
249
312
  end
250
313
 
314
+ SEARCH_TEST_DOCS = self.prepare_search_docs()
251
315
  end
252
-