ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -7,6 +7,8 @@ class IndexWriterTest < Test::Unit::TestCase
7
7
 
8
8
  def setup()
9
9
  @dir = Ferret::Store::RAMDirectory.new
10
+ fis = FieldInfos.new()
11
+ fis.create_index(@dir)
10
12
  end
11
13
 
12
14
  def tear_down()
@@ -18,7 +20,7 @@ class IndexWriterTest < Test::Unit::TestCase
18
20
  clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
19
21
  assert(! wlock.locked?)
20
22
  assert(! clock.locked?)
21
- iw = IndexWriter.new(@dir, :create => true)
23
+ iw = IndexWriter.new(:dir => @dir, :create => true)
22
24
  assert(@dir.exists?("segments"))
23
25
  assert(wlock.locked?)
24
26
  iw.close()
@@ -28,26 +30,408 @@ class IndexWriterTest < Test::Unit::TestCase
28
30
  end
29
31
 
30
32
  def test_add_document
31
- iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
32
- doc = IndexTestHelper.prepare_document()
33
- iw.add_document(doc)
33
+ iw = IndexWriter.new(:dir => @dir,
34
+ :analyzer => StandardAnalyzer.new(),
35
+ :create => true)
36
+ iw << {:title => "first doc", :content => ["contents of", "first doc"]}
34
37
  assert_equal(1, iw.doc_count)
38
+ iw << ["contents of", "second doc"]
39
+ assert_equal(2, iw.doc_count)
40
+ iw << "contents of third doc"
41
+ assert_equal(3, iw.doc_count)
35
42
  iw.close()
36
43
  end
37
44
 
38
- def test_add_documents
39
- iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
40
- # uncomment the following line to see logging
41
- #iw.info_stream = $stdout
45
+ def test_add_documents_fuzzy
46
+ iw = IndexWriter.new(:dir => @dir,
47
+ :analyzer => StandardAnalyzer.new())
42
48
  iw.merge_factor = 3
43
- iw.min_merge_docs = 3
44
- docs = IndexTestHelper.prepare_book_list()
45
- docs.each_with_index do |doc, i|
46
- #puts "Index doc " + i.to_s
49
+ iw.max_buffered_docs = 3
50
+
51
+ # add 100 documents
52
+ 100.times do
53
+ doc = random_doc()
47
54
  iw.add_document(doc)
48
55
  end
49
- assert_equal(37, iw.doc_count)
56
+ assert_equal(100, iw.doc_count)
50
57
  iw.close()
51
58
  end
52
59
 
60
+ private
61
+
62
+ WORDS = [
63
+ "desirous", "hollowness's", "camp's", "Senegal", "broadcaster's",
64
+ "pecking", "Provence", "paternalism", "premonition", "Dumbo's",
65
+ "Darlene's", "Elbert's", "substrate", "Camille", "Menkalinan", "Cooper",
66
+ "decamps", "abatement's", "bindings", "scrubby", "subset", "ancestor's",
67
+ "pelagic", "abscissa", "loofah's", "gleans", "boudoir", "disappointingly",
68
+ "guardianship's", "settlers", "Mylar", "timetable's", "parabolic",
69
+ "madams", "bootlegger's", "monotonically", "gage", "Karyn's", "deposed",
70
+ "boozy", "swordfish's", "Chevron", "Victrola", "Tameka", "impels",
71
+ "carrels", "salami's", "celibate", "resistance's", "duration",
72
+ "abscissae", "Kilroy's", "corrosive", "flight's", "flapper", "scare",
73
+ "peppiest", "Pygmies", "Menzies", "wrist's", "enumerable", "housecoats",
74
+ "Khwarizmi's", "stampeding", "hungering", "steeping", "Yemenis",
75
+ "entangles", "solver", "mishapping", "Rand's", "ninety", "Boris",
76
+ "impedimenta", "predators", "ridge", "wretchedness's", "crapping", "Head",
77
+ "Edwards", "Claude's", "geodesics", "verities", "botch", "Short's",
78
+ "vellum's", "coruscates", "hydrogenates", "Haas's", "deceitfulness",
79
+ "cohort's", "Cepheus", "totes", "Cortez's", "napalm", "fruitcake",
80
+ "coordinated", "Coulomb", "desperation", "behoves", "contractor's",
81
+ "vacationed", "Wanamaker's", "leotard", "filtrated", "cringes", "Lugosi",
82
+ "sheath's", "orb", "jawed", "Isidro", "geophysics", "persons", "Asians",
83
+ "booze's", "eight's", "backslappers", "hankered", "dos", "helpings",
84
+ "tough", "interlarding", "gouger", "inflect", "Juneau's", "hay's",
85
+ "sardining", "spays", "Brandi", "depressant", "space", "assess",
86
+ "reappearance's", "Eli's", "Cote", "Enoch", "chants", "ruffianing",
87
+ "moralised", "unsuccessfully", "or", "Maryland's", "mildest", "unsafer",
88
+ "dutiful", "Pribilof", "teas", "vagued", "microbiologists", "hedgerow",
89
+ "speller's", "conservators", "catharsis", "drawbacks", "whooshed",
90
+ "unlawful", "revolve", "craftsmanship", "destabilise", "Margarito",
91
+ "Asgard's", "spawn's", "Annabel's", "canonicals", "buttermilk",
92
+ "exaltation's", "pothole", "reprints", "approximately", "homage",
93
+ "Wassermann's", "Atlantic's", "exacerbated", "Huerta", "keypunching",
94
+ "engagements", "dilate", "ponchos", "Helvetius", "Krakatoa", "basket's",
95
+ "stepmother", "schlock's", "drippings", "cardiology's", "northwesterly",
96
+ "cruddier", "poesies", "rustproof", "climb", "miscalled", "Belgians",
97
+ "Iago", "brownout", "nurseries", "hooliganism's", "concourse's",
98
+ "advocate", "sunrise's", "hyper", "octopus's", "erecting",
99
+ "counterattacking", "redesign", "studies", "nitrating", "milestone",
100
+ "bawls", "Nereid", "inferring", "Ontario's", "annexed", "treasury",
101
+ "cosmogony's", "scandalised", "shindig's", "detention's",
102
+ "Lollobrigida's", "eradicating", "magpie", "supertankers", "Adventist's",
103
+ "dozes", "Artaxerxes", "accumulate", "dankest", "telephony", "flows",
104
+ "Srivijaya's", "fourteen's", "antonym", "rancid", "briefing's",
105
+ "theologian", "Jacuzzi", "gracing", "chameleon's", "Brittney's",
106
+ "Pullmans", "Robitussin's", "jitterier", "mayonnaise's", "fort",
107
+ "closeouts", "amatory", "Drew's", "cockfight", "pyre", "Laura's",
108
+ "Bradley's", "obstructionists", "interventions", "tenderness's",
109
+ "loadstones", "castigation's", "undercut", "volubly", "meditated",
110
+ "Ypsilanti", "Jannie's", "tams", "drummer's", "inaugurations", "mawing",
111
+ "Anglophile", "Sherpa", "footholds", "Gonzalo", "removers",
112
+ "customisation", "procurement's", "allured", "grimaced", "captaining",
113
+ "liberates", "grandeur's", "Windsor", "screwdrivers", "Flynn's",
114
+ "extortionists", "carnivorous", "thinned", "panhandlers", "trust's",
115
+ "bemoaned", "untwisted", "cantors", "rectifies", "speculation",
116
+ "niacin's", "soppy", "condom", "halberd", "Leadbelly", "vocation's",
117
+ "tanners", "chanticleer", "secretariats", "Ecuador's", "suppurated",
118
+ "users", "slag's", "atrocity's", "pillar", "sleeveless", "bulldozers",
119
+ "turners", "hemline", "astounded", "rosaries", "Mallarmé", "crucifies",
120
+ "Maidenform", "contribution", "evolve", "chemicals", "uteri",
121
+ "expostulation", "roamers", "daiquiris", "arraignment", "ribs", "King's",
122
+ "Persepolis", "arsenic's", "blindfolds", "bloodsucker's", "restocks",
123
+ "falconry", "Olympia's", "Colosseum's", "vigils", "Louie's",
124
+ "unwillingly", "sealed", "potatoes", "Argentine", "audit's", "outworn",
125
+ "boggles", "likely", "alleging", "Tinkerbell", "redistribution's",
126
+ "Normandy", "Cortes", "porter's", "buntings", "cornucopias", "rosewoods",
127
+ "shelf's", "airdrops", "summits", "Rosalyn", "redecorating", "twirlers",
128
+ "monsters", "directed", "semiautomatics", "Foch", "Hobart", "mutilates",
129
+ "Wilma's", "ornamenting", "Clifford's", "pyromania", "Strasbourg",
130
+ "bleeders", "additions", "super", "effortlessly", "piecing", "vacations",
131
+ "gybes", "warranted", "Ting", "her", "histrionic", "marshaled", "spore's",
132
+ "villainy's", "brat", "confusion", "amphitheatre's", "adjourns",
133
+ "guzzled", "Visayans", "rogue's", "morsels", "candlestick", "flaks",
134
+ "Waterbury", "pulp's", "endorser's", "postdoc", "coffining", "swallowing",
135
+ "Wrangell", "Marcie's", "Marley", "untapped", "fear's", "Kant",
136
+ "pursuit's", "normally", "jackals", "orals", "Paramaribo's", "Marilyn's",
137
+ "Diem's", "narrower", "medicinally", "chickweed's", "pretentiousness",
138
+ "Lardner", "baritone's", "purrs", "Pam's", "pestles", "Philip's",
139
+ "Titania", "eccentrics", "Albion's", "greed's", "raggediest",
140
+ "importations", "Truman", "incentives", "typified", "incurred",
141
+ "bandstands", "Minnie's", "pleasant", "Sandy's", "perplexities",
142
+ "crease's", "obliques", "backstop", "Nair's", "perusing", "Quixote's",
143
+ "sicknesses", "vapour's", "butte", "lariats", "disfavours", "McGuffey",
144
+ "paediatric", "filtered", "whiff's", "gunboats", "devolved",
145
+ "extravaganza's", "organism", "giggling", "citadel's", "counterbalances",
146
+ "executrixes", "Cathay", "marshmallow's", "iniquitous", "Katmai", "Siva",
147
+ "welled", "impertinence's", "plunger", "rice", "forgers", "Larousse",
148
+ "pollution's", "medium", "residue's", "rumbas", "Odis", "arrogant",
149
+ "Jasper's", "panged", "doubted", "vistaing", "decibel's", "modulus's",
150
+ "chickpea's", "mugger's", "potentates", "sequesters", "academy's",
151
+ "Turk's", "pharmacology's", "defogger", "clomp", "soulless", "elastic",
152
+ "la's", "shards", "unfortunate", "counterclaim's", "objections", "towel",
153
+ "converged", "z", "ionisation", "stirrups", "antiquarians", "constructor",
154
+ "virtuosity's", "Göteborg", "centigramme's", "translators", "dalliance's",
155
+ "us", "bullfight", "drawer's", "nonconformist", "handcrafts", "Magritte",
156
+ "tulle", "plant's", "routine", "colour's", "latency's", "repertoire's",
157
+ "photocopies", "catalyse", "ashrams", "lagging", "flapjack's",
158
+ "ayatollahs", "decentest", "pitted", "conformity", "jack", "batsman",
159
+ "electrifies", "Unitarians", "obtain", "medicates", "tumour's",
160
+ "nutritionally", "haystack", "bustles", "slut", "satirising", "birettas",
161
+ "starring", "Kubrick's", "flogs", "chequering", "Menkalinan's",
162
+ "Barbados's", "Bioko", "swinish", "hades", "perjured", "timing's",
163
+ "cocaine", "ejecting", "rationalises", "dilettante's", "umping",
164
+ "capsized", "frogmen", "matt", "prostituting", "bola's", "devolution's",
165
+ "poxing", "Maritza's", "snob's", "scoped", "Costco", "feral", "sirocco",
166
+ "rebating", "truculence", "junkier", "nabs", "elicit", "allegiance",
167
+ "care", "arteriosclerosis's", "nonproliferation's", "doxologies",
168
+ "disconsolate", "bodega", "designers", "Rembrandt", "apostasies",
169
+ "garrulousness", "Hertzsprung's", "hayseeds", "noncooperation's",
170
+ "resentment", "cuticles", "sandboxes", "gimmicks", "magnolia",
171
+ "invalidity's", "pulverised", "Tinkerbell's", "hypoglycemics",
172
+ "gunboat's", "workbench's", "fleetingly's", "sportsman's", "trots",
173
+ "decomposes", "discrepancies", "owls", "obscener", "organic", "stoutness",
174
+ "councillor's", "Philippine's", "Aline", "coarsening", "suffocated",
175
+ "infighting's", "peculiarity", "roof's", "premier", "sucked", "churl",
176
+ "remounts", "intends", "wiles", "unfold", "unperturbed", "wainscotings",
177
+ "restfuller", "ashtray's", "wader's", "decanters", "gild", "tandems",
178
+ "spooked", "galling", "annuity's", "opacity", "clamour's", "flaccid",
179
+ "caroming", "savvying", "mammalian's", "toadstool's", "doohickey", "jibs",
180
+ "conquests", "dishes", "effusively", "distinctions", "curly", "Peckinpah",
181
+ "whining", "quasar", "sponge", "infrequent", "Novembers", "cowling",
182
+ "poem's", "muzzles", "Sufi", "authoritarians", "prompts", "Gavin's",
183
+ "morphology's", "shenanigan", "narrated", "rapprochement", "Heine",
184
+ "propane's", "addition", "prefect's", "pining", "dwindles",
185
+ "compulsiveness's", "objectors", "trudging", "segregates", "language",
186
+ "enthralled", "explosiveness", "toeing", "drainers", "Merrimack's",
187
+ "smarten", "bigwig's", "embroiders", "Medicaids", "grammar's", "behest's",
188
+ "chiseled", "equalled", "factual", "Casablanca's", "dams",
189
+ "disillusioned", "turtleneck", "Baden", "provinces", "bushwhacked", "fey",
190
+ "Yangtze", "loan's", "decent", "strobe", "challenger's", "hometown",
191
+ "Neal", "Ernestine's", "magnetises", "minute", "patrol", "Starbucks",
192
+ "Bernstein", "signal", "interplanetary", "tweak", "archdeacon",
193
+ "untoward", "transducer", "azaleas", "levied", "worlds", "talks",
194
+ "Tancred", "hairsplitting's", "edibility's", "confab", "rosetted",
195
+ "Spanish", "Americanisation", "Charley", "realm's", "incongruities",
196
+ "chinstraps", "dollhouses", "binocular", "popgun", "physiotherapy's",
197
+ "knave's", "angelically", "heartbreaking", "clarions", "bespeaks",
198
+ "pivotal", "Zosma", "ungrammatical", "dilution", "tidily", "Dejesus's",
199
+ "taller", "pennyweight's", "freshman", "Jamestown", "chiefer", "amen",
200
+ "attiring", "appurtenance's", "opiates", "mottoes", "towellings", "ashen",
201
+ "font's", "spoors", "pupil", "groom's", "skimpy", "achieves",
202
+ "intolerance's", "ardour's", "exorcist", "bottoming", "snag's",
203
+ "Frenches", "hysteric's", "ladyfinger's", "differences", "seed",
204
+ "clubfoot's", "glades", "Elton's", "jargon", "Waldo", "grinning",
205
+ "coherence's", "winos", "turnround", "appended", "Ethelred's", "delete",
206
+ "steadfastness's", "miss", "thermoplastic", "depraves", "unctuous",
207
+ "reanimates", "transfusing", "protects", "Babbage's", "foists", "inn",
208
+ "etched", "sanctimoniously", "idling", "timepiece", "holistic",
209
+ "waterside", "ulna's", "swindled", "employables", "zebra", "nieces",
210
+ "pertained", "usages", "vamp's", "Larry's", "cooler's", "holographs",
211
+ "clewing", "stubborning", "peaked", "underfeeds", "marshmallows",
212
+ "agreeable", "beards", "Slovenia's", "nitroglycerin", "palls", "impurer",
213
+ "armours", "stomachaches", "notification's", "Dixieland's", "crozier's",
214
+ "neurotic", "kudos", "Tania's", "M", "soundtrack's", "territory's",
215
+ "sped", "house's", "divisibility", "ingress's", "pummelled", "Isabel",
216
+ "Dewitt", "seemly", "hutched", "calliope", "lengthwise", "flubs",
217
+ "Moldavia's", "Mercia", "McBride's", "Lenten", "pulverise", "football",
218
+ "oligarchy", "Max", "scribbler", "acclimatize", "brainwashes",
219
+ "apprenticed", "benevolences", "two", "Wodehouse", "crew's", "massacre",
220
+ "proportionals", "Jewishness's", "instep's", "emissary", "folder",
221
+ "nonentity's", "convinced", "caption", "kangarooed", "dogie",
222
+ "vagabonding", "auction's", "appraising", "antimony", "part's",
223
+ "longitude's", "inconsiderateness's", "pawning", "serer", "solos",
224
+ "histories", "mushy", "parturition", "munched", "oregano", "inanest",
225
+ "dryness", "kitchenware", "unexpected", "covens", "cheesecakes",
226
+ "stakeout's", "Pulaski's", "Yoknapatawpha's", "pinhead", "drifted",
227
+ "guzzler's", "funking", "sou'wester", "oesophagus's", "highbrow",
228
+ "contralto", "meningitis", "Mazzini", "raggedest", "vaginas", "misfiring",
229
+ "margaritas", "wedder", "pointed", "slicked", "garlanded", "comeuppances",
230
+ "vassals", "Sui", "Concord", "bozos", "Garry's", "Maribel's", "epileptic",
231
+ "Jehoshaphat's", "revolutionary's", "kneecaps", "songbird", "actively",
232
+ "Meredith", "toddler", "distrusting", "fuchsias", "perusal", "instills",
233
+ "deathbed", "sunspot's", "spatula's", "Muscovy", "humaniser", "Keats",
234
+ "regrets", "deflect", "theories", "nonpluses", "populating", "leniency's",
235
+ "penicillin's", "gaol's", "borough", "moose's", "dogmata",
236
+ "transcendentally", "supposition's", "nursed", "Gagarin's", "honest",
237
+ "Chandrasekhar's", "mudslinger's", "parable", "bonged", "Wyeth's",
238
+ "Ochoa's", "Grenoble", "steamy", "halter's", "rotisserie's", "pagoda's",
239
+ "wallaby's", "Yank", "pretzel", "rapist's", "estrange", "hectored",
240
+ "Puebla's", "conniver", "creditor's", "dole's", "Fotomat", "patents",
241
+ "heckling", "thickener", "etches", "yogi", "hemstitched", "obverses",
242
+ "Lipizzaner", "divert", "Strong's", "sagest", "Alabama", "He", "Carrie's",
243
+ "obligation's", "verity's", "outed", "Rhee", "bluffed", "codas",
244
+ "crèche's", "unpalatable", "dilettanti", "vestment", "purse's",
245
+ "inflammation's", "bookmarked", "doing's", "whinnying", "impersonators",
246
+ "Theiler", "scurried", "resistor", "southerners", "Anacreon",
247
+ "reconstruction's", "footage", "trespassing", "Kafka", "bottling",
248
+ "stays", "Gretzky", "overburdening", "princesses", "weathercock's",
249
+ "atolls", "cheerier", "packet", "surrenders", "teacup", "Sabik's",
250
+ "undecidable", "lollygagged", "pawl's", "anaesthesiology", "sublimely",
251
+ "contortionists", "motorcades", "Maureen", "lamasery", "yourselves",
252
+ "Creighton", "poliomyelitis's", "civil", "outmanoeuvre", "lauded",
253
+ "closeness", "Humboldt's", "pretzels", "ungrudging", "blackguard's",
254
+ "sickles", "typo", "narcotics", "linesman", "psychotics", "pictured",
255
+ "deviltry", "Yahtzee", "Lovelace's", "cerebra", "airiness's", "bewitch",
256
+ "how", "motherland's", "crate's", "Keenan's", "turnstile's",
257
+ "pedometer's", "carted", "slipping", "fallow", "Canadian", "ladybird's",
258
+ "thump", "shopper's", "enters", "scowls", "nematode", "focused",
259
+ "Riley's", "grainiest", "novas", "snuffled", "leftovers", "deify",
260
+ "Samoan", "pruning", "contenting", "Khachaturian's", "triads",
261
+ "genealogies", "psalmist", "shaming", "appropriated", "ignominies",
262
+ "Beadle's", "MHz", "peerages", "facile", "Seoul", "Janna's", "jig's",
263
+ "mousiness's", "funnier", "delimiter", "watermark", "sheik's", "Reasoner",
264
+ "ipecac's", "curdles", "wronged", "Segovia's", "solders", "Dunne's",
265
+ "contractor", "awards", "hostels", "pinkie's", "Herzl", "misplace",
266
+ "shuttle", "innovative", "vestries", "cosmoses", "trikes", "Casandra's",
267
+ "hokier", "carouser's", "summerhouses", "renascence", "decomposed",
268
+ "Balzac's", "outlast", "shod", "squalling", "smugging", "weighing",
269
+ "omega's", "selects", "fleetingly", "Finland", "petted", "disrespects",
270
+ "fetter", "confound", "brads", "Bosnia's", "preposition's", "guy's",
271
+ "different", "tracts", "paediatrics's", "polygon", "eyetooth's", "Aesop",
272
+ "pentagons", "professions", "homeowner", "looter's", "intimidated",
273
+ "lustre's", "loneliness", "catnapped", "counties", "pailful",
274
+ "Christendom's", "Barents", "penis", "Mumford's", "Nigel", "éclairs",
275
+ "splats", "diabolical", "popularly", "quart", "abjected", "Rasalgethi",
276
+ "camel's", "inimical", "overweening", "distention's", "Advil", "casement",
277
+ "seamier", "avaricious", "sierra's", "caparison's", "moldered", "Cortez",
278
+ "handmaid's", "disappointment", "billowed", "overpopulated", "outsets",
279
+ "ray", "smoother", "overkill", "somber", "tiller's", "zigzag", "adviser",
280
+ "absorption's", "sturdily", "hairy", "bloodmobile", "investiture's",
281
+ "creature", "ripeness's", "Jonathon", "arborvitae's", "skulduggery",
282
+ "bog", "skeleton's", "Kit's", "Panamas", "Ashlee's", "jazzy", "snit",
283
+ "divisive", "caribous", "permuting", "frankest", "annotated", "oak's",
284
+ "meg's", "Gill", "burrito", "dormancy's", "offings", "Nike",
285
+ "outnumbered", "skater's", "Portugal", "deficit", "Cannon's", "pockmark",
286
+ "sediment's", "mailbox", "innuendoed", "retire", "wolfhound's",
287
+ "nicotine's", "brigade's", "mettle's", "softhearted", "hooey's",
288
+ "abdication", "Orval", "Jaime", "ship", "hyphenations", "sectarians",
289
+ "Alabaman", "tagging", "ultras", "schizoids", "medicines", "undersized",
290
+ "Gray", "maternity's", "bandaging", "scooping", "coercion's", "serapes",
291
+ "celebrate", "Listerine's", "throve", "crypt's", "nearsighted",
292
+ "metallurgists", "Delicious", "cotton's", "yoked", "cogitates",
293
+ "underage", "cigarette's", "hallways", "Cointreau", "ma'am", "spacing's",
294
+ "foresight", "parkway's", "Edwardian", "mediator", "Turner", "Derrida's",
295
+ "motorist's", "hobo", "equivalences", "sophism", "peeping", "telescoped",
296
+ "overproduce", "ductility", "Leblanc", "refractory", "passé", "decodes",
297
+ "womanising", "flax's", "pond's", "infrequency", "talkativeness's",
298
+ "settlement's", "Prince", "bating", "multimillionaire", "Schultz",
299
+ "premiss", "quackery", "bathhouse", "Leno's", "Monday's", "Hung's",
300
+ "undaunted", "bewaring", "tension's", "Chile's", "Rostand's", "platoons",
301
+ "rodeo's", "Dionne", "Dyson's", "gingivitis's", "fewer",
302
+ "electromagnetism's", "scrubbier", "ensconced", "wretcheder", "mica's",
303
+ "expectorant", "snapper's", "chastised", "habitation", "spry", "bathing",
304
+ "stealth's", "champagnes", "baleful", "fencing's", "threaded", "codicils",
305
+ "disgraced", "redcaps", "addends", "Olivier", "clasped", "Gwendolyn",
306
+ "foment", "angularity's", "strenuously", "gorilla", "misbehaved",
307
+ "surplus's", "newsier", "positioned", "bloodmobiles", "circumstantials",
308
+ "person's", "varicose", "Calliope", "plethora", "Olmsted",
309
+ "reconciliation", "Brendan's", "beset", "totters", "sailors",
310
+ "parliamentarians", "Whitaker", "hilts", "pummelling", "academician's",
311
+ "ruse", "discreeter", "appetisingly", "perfections", "anus", "overrode",
312
+ "pedantry's", "possessed", "germs", "unscrews", "expired",
313
+ "semitrailer's", "Cupid's", "nonsmoker", "Marathon", "secs", "Hopkins",
314
+ "freeing", "libelled", "furious", "staccatos", "electroencephalogram's",
315
+ "malingerer's", "impulses", "briars", "Tran", "hilltops", "sulks",
316
+ "quailed", "fads", "retrenches", "spouted", "outtake", "puncture's",
317
+ "rats", "kibitzed", "berets", "omnivorous", "flange", "Mons", "glints",
318
+ "mansards", "thou", "cuing", "suspected", "Kaiser's", "savvier", "skits",
319
+ "interdict's", "Booker", "Rubinstein", "Tm's", "crossing's", "dewlap",
320
+ "guarantor's", "edification's", "joyfullest", "crossed", "chowdering",
321
+ "sillier", "reloading", "commodity's", "bodkins", "conduced", "coughs",
322
+ "nucleus's", "sixtieth", "proverbially", "comprehensive", "ineluctably",
323
+ "patrolmen", "resuscitating", "carpetbag's", "Darrin's", "Yeager",
324
+ "Bataan's", "spoonsful", "proceeds", "wrongdoer", "Karroo", "heart",
325
+ "poison", "typifying", "endowment's", "aquanauts", "deaconesses",
326
+ "homosexuality", "Maxine", "haunching", "centred", "Peking's",
327
+ "toothiest", "growers", "firebombs", "throbs", "Downy", "contribution's",
328
+ "sago's", "Cole", "Knoxville", "leftmost", "Nell's", "Baffin", "barrings",
329
+ "contagions", "disencumbers", "countdown", "quintuple", "perihelion",
330
+ "creationism's", "actioning", "admiralty", "Mt's", "durability's",
331
+ "sewer's", "replicas", "oxide", "ripened", "Pisces's", "Cinerama's",
332
+ "catheters", "oppressive", "roosting", "foggiest", "properly", "Kareem",
333
+ "Ollie", "minuted", "vehicles", "eel", "remunerates", "swashbuckler's",
334
+ "remunerative", "sanguining", "Belem's", "forlornly", "rudders",
335
+ "officialdom", "countertenors", "Upton", "whoop", "animations", "arouses",
336
+ "millionths", "videocassette", "fledgling", "shake", "exterminated",
337
+ "Cain's", "trendiest", "wariest", "torpedoes", "airmails", "Cameron's",
338
+ "discord's", "spitefulness's", "thudded", "menaced", "takeovers",
339
+ "solicited", "wallpapers", "economic", "cache", "rechargeable", "gongs",
340
+ "droning", "exemption", "Alaskans", "toothed", "snifter", "Stephens",
341
+ "prejudge", "doctor's", "bobolinks", "rotates", "valuation's", "narrator",
342
+ "weaning", "uncle", "shelter", "destitution's", "Edgardo's", "gauge",
343
+ "Nice", "Adolf's", "rheumatics", "inheritances", "undesirables",
344
+ "Eileen's", "flyweight's", "scope", "possessiveness", "tipsily",
345
+ "effulgence", "rematch", "Baltic", "unsteadiest", "rodeos", "gloaming's",
346
+ "ringers", "randomised", "commissars", "destroyer's", "router",
347
+ "disengaging", "it's", "Albert", "rampantly", "varmint", "Adkins",
348
+ "chevron", "insomniac", "bobsledded", "masochist's", "chronometers",
349
+ "compaction", "Mauro", "sidled", "Highlander's", "snail's", "syllabifies",
350
+ "application's", "symmetrical", "blacking", "accent's", "sentimentalists",
351
+ "sonatas", "profanities", "sloping", "Araby", "percolate", "repeated",
352
+ "youthfulness's", "Loyola", "deliriously", "matriarch's", "tailors",
353
+ "rerouting", "hairpin", "dispersal", "endowment", "disquieting", "swat",
354
+ "neckerchieves", "wrinkles", "amoebas", "Darcy", "orthodontics's",
355
+ "milder", "sneezing", "prescience's", "pads", "wrought", "perspicuity's",
356
+ "materialist", "pull", "laundryman's", "lazily", "protractor's", "Vic",
357
+ "photocopier", "guardrooms", "cablecasting", "confirms", "excretions",
358
+ "combatant", "counterfeiters", "periwig", "genteelest", "router's",
359
+ "springy", "procreated", "syphon", "parent's", "bigwigs", "rebelled",
360
+ "milkmaids", "McGee's", "seaworthier", "Bellatrix's", "tenement",
361
+ "embryologists", "Vaselining", "burrow's", "tonnage's", "Petty's",
362
+ "chancels", "scouring", "mouser", "recompensed", "guarding", "editor",
363
+ "raster", "bourgeoisie's", "interpolating", "skinflint's", "transport",
364
+ "bullfinch", "needlessly", "withholds", "counterclockwise", "panicking",
365
+ "Ahriman", "flambeing", "contrary", "heartstrings", "whittled", "crib's",
366
+ "highlighter", "extroverted", "Martinique's", "racquets", "Maldivian",
367
+ "physiognomy", "Hammarskjold", "massage", "shingling", "neighbourhood",
368
+ "boobed", "vulture", "intercontinental", "cobblers", "peddlers",
369
+ "forthrightly", "germicide", "raindrop's", "fir's", "decaffeinates",
370
+ "wobblier", "abnegated", "cruiser's", "satiety", "trilled", "impending",
371
+ "gulf", "mountebank", "beltway", "reappointment", "cinematographer",
372
+ "pylon", "penthouses", "morally", "installs", "Walsh's", "drawstring",
373
+ "circus's", "Khayyam's", "Myrtle's", "ventrals", "category's",
374
+ "opportunistic", "grovelling", "warier", "upchuck", "hairdresser's",
375
+ "Montanans", "jobber", "dazzle", "encirclement's", "muffin's", "coronets",
376
+ "focus's", "footfall's", "subjunctives", "late", "pedagogued",
377
+ "dignitaries", "content", "blockbusters", "reminiscent", "mayor",
378
+ "specifier", "extinction", "nutshell's", "catbird's", "bundle",
379
+ "gracefulness", "exceed", "estranges", "chancy", "bankrupted", "Avery",
380
+ "Barnett", "succulence", "stacking", "ensnare", "truck", "embargo",
381
+ "persecutes", "translation's", "muskrat's", "illumines", "undercoat's",
382
+ "fleecier", "brick", "qualities", "imprecision", "reprisals", "discounts",
383
+ "harmonics", "Mann's", "terrorism", "interminable", "Santiago's",
384
+ "deepness", "tramples", "golder", "voyeurism's", "tent", "particle's",
385
+ "minuend", "waxwings", "knobby", "trustee", "funnily", "hotheadedness's",
386
+ "Kristin", "what", "bite", "murmur's", "pustule's", "weeknights",
387
+ "rocked", "athlete", "ventilates", "impresses", "daguerreotyping",
388
+ "Gross", "gambols", "villa", "maraud", "disapproval", "apostrophe's",
389
+ "sheaf", "noisemaker's", "autonomy's", "massing", "daemon's", "Thackeray",
390
+ "fermenting", "whammy", "philosophise", "empathy", "calamities",
391
+ "sunbathe", "Qom", "yahoo's", "coxcomb's", "move", "school's",
392
+ "rainmakers", "shipwreck", "potbelly's", "courageously", "current",
393
+ "Aleut", "treaties", "U", "always", "Bosch", "impregnating", "bud's",
394
+ "carat", "centrists", "acquaintance's", "convoy's", "chichis",
395
+ "restraint's", "Cosby", "factotums", "handshaking", "paragon's",
396
+ "mileages", "Tammie", "cartoonists", "lemmas", "lowliness's", "onion's",
397
+ "E's", "Bible", "Cranmer", "fob's", "minks", "overstocking", "Willamette",
398
+ "needle's", "scuppers", "Carborundum", "upwardly", "tallies", "aptitude",
399
+ "synod", "nasturtium's", "Pensacola", "snappish", "merino", "sups",
400
+ "fingerboard's", "prodigy's", "narcissism's", "substantial", "lug",
401
+ "establishing", "Vergil's", "patrimonies", "shorted", "forestation",
402
+ "undeniable", "Katmandu", "lamination", "trollop's", "odd", "stanza",
403
+ "paraplegic", "melanin", "Rico", "foreman", "stereotypes", "affinity's",
404
+ "cleansing", "sautéing", "epochs", "crooners", "manicured", "undisclosed",
405
+ "propel", "usage", "Alioth's", "Aurelia's", "peruse", "Vassar's",
406
+ "Demosthenes's", "Brazos", "supermarket", "scribbles", "Jekyll's",
407
+ "discomfort's", "mastiffs", "ballasting", "Figueroa", "turnstiles",
408
+ "convince", "Shelton's", "Gustavo", "shunting", "Fujitsu's", "fining's",
409
+ "hippos", "dam's", "expressionists", "peewee", "troop's"
410
+ ]
411
+ WORDS_SIZE = WORDS.size
412
+
413
+ def random_word
414
+ return WORDS[rand(WORDS_SIZE)]
415
+ end
416
+
417
+ def random_sentence(max_len)
418
+ sentence = ""
419
+ (1 + rand(max_len)).times { sentence << " " << random_word }
420
+ return sentence
421
+ end
422
+
423
+ def random_doc(max_fields = 10, max_elements = 10, max_len = 100)
424
+ doc = {}
425
+ (1 + rand(max_fields)).times do
426
+ field = random_word.intern
427
+ elem_count = rand(max_elements) + 1
428
+ if (elem_count == 1)
429
+ doc[field] = random_sentence(max_len)
430
+ else
431
+ doc[field] = []
432
+ elem_count.times { doc[field] << random_sentence(max_len)}
433
+ end
434
+ end
435
+ return doc
436
+ end
53
437
  end
@@ -1,8 +1,4 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
1
  module IndexTestHelper
5
- include Ferret::Document
6
2
  include Ferret::Index
7
3
  include Ferret::Analysis
8
4
  include Ferret::Search
@@ -16,23 +12,34 @@ module IndexTestHelper
16
12
  BINARY_DATA = IndexTestHelper.make_binary(256)
17
13
  COMPRESSED_BINARY_DATA = IndexTestHelper.make_binary(56)
18
14
 
19
- def IndexTestHelper.prepare_document
20
- doc = Document.new()
21
-
22
- doc << Field.new("text_field1", "field one text", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
23
- doc << Field.new("text_field2", "field field field two text", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
24
- doc << Field.new("key_field", "keyword", Field::Store::YES, Field::Index::UNTOKENIZED)
25
- doc << Field.new("unindexed_field", "unindexed field text", Field::Store::YES, Field::Index::NO)
26
- doc << Field.new("unstored_field1", "unstored field text one", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO)
27
- doc << Field.new("unstored_field2", "unstored field text two", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::YES)
28
- doc << Field.new("compressed_field", "compressed text", Field::Store::COMPRESS, Field::Index::TOKENIZED, Field::TermVector::YES)
29
- doc << Field.new_binary_field("binary_field", BINARY_DATA, Field::Store::YES)
30
- doc << Field.new_binary_field("compressed_binary_field", COMPRESSED_BINARY_DATA, Field::Store::COMPRESS)
31
- return doc
15
+ def IndexTestHelper.prepare_document(dir)
16
+ fis = FieldInfos.new
17
+ fis.add_field(:text_field1, :term_vector => :no)
18
+ fis.add_field(:text_field2)
19
+ fis.add_field(:key_field, :index => :untokenized)
20
+ fis.add_field(:unindexed_field, :index => :no)
21
+ fis.add_field(:unstored_field1, :store => :no, :term_vector => :no)
22
+ fis.add_field(:unstored_field2, :store => :no, :term_vector => :yes)
23
+ fis.add_field(:compressed_field, :store => :compressed, :term_vector => :yes)
24
+ fis.add_field(:binary_field, :index => :no, :term_vector => :no)
25
+ fis.add_field(:compressed_binary_field, :store => :compressed,
26
+ :index => :no, :term_vector => :no)
27
+ doc = {
28
+ :text_field1 => "field one text",
29
+ :text_field2 => "field field field two text",
30
+ :key_field => "keyword",
31
+ :unindexed_field => "unindexed field text",
32
+ :unstored_field1 => "unstored field text one",
33
+ :unstored_field2 => "unstored field text two",
34
+ :compressed_field => "compressed text",
35
+ :binary_field => BINARY_DATA,
36
+ :compressed_binary_field => COMPRESSED_BINARY_DATA
37
+ }
38
+ return doc, fis
32
39
  end
33
40
 
34
41
  def IndexTestHelper.prepare_documents
35
- data = [
42
+ [
36
43
  ["apple", "green"],
37
44
  ["apple", "red"],
38
45
  ["orange", "orange"],
@@ -41,212 +48,268 @@ module IndexTestHelper
41
48
  ["mandarin", "orange"],
42
49
  ["peach", "orange"],
43
50
  ["apricot", "orange"]
44
- ]
45
-
46
- docs = []
47
-
48
- data.each do |food|
49
- doc = Document.new()
50
- doc << Field.new("name", food[0], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
51
- doc << Field.new("colour", food[1], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
52
- docs << doc
53
- end
54
- return docs
55
- end
56
-
57
- def IndexTestHelper.write_document(dir, doc, segment="test", analyzer = WhiteSpaceAnalyzer.new(), similarity = Similarity.default())
58
- writer = DocumentWriter.new(dir, analyzer, similarity, 50)
59
- writer.add_document(segment, doc)
51
+ ].map { |food| {"name" => food[0], "colour" => food[1]} }
60
52
  end
61
53
 
62
54
  def IndexTestHelper.prepare_book_list
63
55
  books = [
64
- {"author" => "P.H. Newby", "title" => "Something To Answer For", "year" => "1969"},
65
- {"author" => "Bernice Rubens", "title" => "The Elected Member", "year" => "1970"},
66
- {"author" => "V. S. Naipaul", "title" => "In a Free State", "year" => "1971"},
67
- {"author" => "John Berger", "title" => "G", "year" => "1972"},
68
- {"author" => "J. G. Farrell", "title" => "The Siege of Krishnapur", "year" => "1973"},
69
- {"author" => "Stanley Middleton", "title" => "Holiday", "year" => "1974"},
70
- {"author" => "Nadine Gordimer", "title" => "The Conservationist", "year" => "1974"},
71
- {"author" => "Ruth Prawer Jhabvala", "title" => "Heat and Dust", "year" => "1975"},
72
- {"author" => "David Storey", "title" => "Saville", "year" => "1976"},
73
- {"author" => "Paul Scott", "title" => "Staying On", "year" => "1977"},
74
- {"author" => "Iris Murdoch", "title" => "The Sea", "year" => "1978"},
75
- {"author" => "Penelope Fitzgerald", "title" => "Offshore", "year" => "1979"},
76
- {"author" => "William Golding", "title" => "Rites of Passage", "year" => "1980"},
77
- {"author" => "Salman Rushdie", "title" => "Midnight's Children", "year" => "1981"},
78
- {"author" => "Thomas Keneally", "title" => "Schindler's Ark", "year" => "1982"},
79
- {"author" => "J. M. Coetzee", "title" => "Life and Times of Michael K", "year" => "1983"},
80
- {"author" => "Anita Brookner", "title" => "Hotel du Lac", "year" => "1984"},
81
- {"author" => "Keri Hulme", "title" => "The Bone People", "year" => "1985"},
82
- {"author" => "Kingsley Amis", "title" => "The Old Devils", "year" => "1986"},
83
- {"author" => "Penelope Lively", "title" => "Moon Tiger", "year" => "1987"},
84
- {"author" => "Peter Carey", "title" => "Oscar and Lucinda", "year" => "1988"},
85
- {"author" => "Kazuo Ishiguro", "title" => "The Remains of the Day", "year" => "1989"},
86
- {"author" => "A. S. Byatt", "title" => "Possession", "year" => "1990"},
87
- {"author" => "Ben Okri", "title" => "The Famished Road", "year" => "1991"},
88
- {"author" => "Michael Ondaatje", "title" => "The English Patient", "year" => "1992"},
89
- {"author" => "Barry Unsworth", "title" => "Sacred Hunger", "year" => "1992"},
90
- {"author" => "Roddy Doyle", "title" => "Paddy Clarke Ha Ha Ha", "year" => "1993"},
91
- {"author" => "James Kelman", "title" => "How Late It Was, How Late", "year" => "1994"},
92
- {"author" => "Pat Barker", "title" => "The Ghost Road", "year" => "1995"},
93
- {"author" => "Graham Swift", "title" => "Last Orders", "year" => "1996"},
94
- {"author" => "Arundati Roy", "title" => "The God of Small Things", "year" => "1997"},
95
- {"author" => "Ian McEwan", "title" => "Amsterdam", "year" => "1998"},
96
- {"author" => "J. M. Coetzee", "title" => "Disgrace", "year" => "1999"},
97
- {"author" => "Margaret Atwood", "title" => "The Blind Assassin", "year" => "2000"},
98
- {"author" => "Peter Carey", "title" => "True History of the Kelly Gang", "year" => "2001"},
99
- {"author" => "Yann Martel", "title" => "The Life of Pi", "year" => "2002"},
100
- {"author" => "DBC Pierre", "title" => "Vernon God Little", "year" => "2003"}
56
+ {"author" => "P.H. Newby",
57
+ "title" => "Something To Answer For",
58
+ "year" => "1969"},
59
+ {"author" => "Bernice Rubens",
60
+ "title" => "The Elected Member",
61
+ "year" => "1970"},
62
+ {"author" => "V. S. Naipaul",
63
+ "title" => "In a Free State",
64
+ "year" => "1971"},
65
+ {"author" => "John Berger",
66
+ "title" => "G",
67
+ "year" => "1972"},
68
+ {"author" => "J. G. Farrell",
69
+ "title" => "The Siege of Krishnapur",
70
+ "year" => "1973"},
71
+ {"author" => "Stanley Middleton",
72
+ "title" => "Holiday",
73
+ "year" => "1974"},
74
+ {"author" => "Nadine Gordimer",
75
+ "title" => "The Conservationist",
76
+ "year" => "1974"},
77
+ {"author" => "Ruth Prawer Jhabvala",
78
+ "title" => "Heat and Dust",
79
+ "year" => "1975"},
80
+ {"author" => "David Storey",
81
+ "title" => "Saville",
82
+ "year" => "1976"},
83
+ {"author" => "Paul Scott",
84
+ "title" => "Staying On",
85
+ "year" => "1977"},
86
+ {"author" => "Iris Murdoch",
87
+ "title" => "The Sea",
88
+ "year" => "1978"},
89
+ {"author" => "Penelope Fitzgerald",
90
+ "title" => "Offshore",
91
+ "year" => "1979"},
92
+ {"author" => "William Golding",
93
+ "title" => "Rites of Passage",
94
+ "year" => "1980"},
95
+ {"author" => "Salman Rushdie",
96
+ "title" => "Midnight's Children",
97
+ "year" => "1981"},
98
+ {"author" => "Thomas Keneally",
99
+ "title" => "Schindler's Ark",
100
+ "year" => "1982"},
101
+ {"author" => "J. M. Coetzee",
102
+ "title" => "Life and Times of Michael K",
103
+ "year" => "1983"},
104
+ {"author" => "Anita Brookner",
105
+ "title" => "Hotel du Lac",
106
+ "year" => "1984"},
107
+ {"author" => "Keri Hulme",
108
+ "title" => "The Bone People",
109
+ "year" => "1985"},
110
+ {"author" => "Kingsley Amis",
111
+ "title" => "The Old Devils",
112
+ "year" => "1986"},
113
+ {"author" => "Penelope Lively",
114
+ "title" => "Moon Tiger",
115
+ "year" => "1987"},
116
+ {"author" => "Peter Carey",
117
+ "title" => "Oscar and Lucinda",
118
+ "year" => "1988"},
119
+ {"author" => "Kazuo Ishiguro",
120
+ "title" => "The Remains of the Day",
121
+ "year" => "1989"},
122
+ {"author" => "A. S. Byatt",
123
+ "title" => "Possession",
124
+ "year" => "1990"},
125
+ {"author" => "Ben Okri",
126
+ "title" => "The Famished Road",
127
+ "year" => "1991"},
128
+ {"author" => "Michael Ondaatje",
129
+ "title" => "The English Patient",
130
+ "year" => "1992"},
131
+ {"author" => "Barry Unsworth",
132
+ "title" => "Sacred Hunger",
133
+ "year" => "1992"},
134
+ {"author" => "Roddy Doyle",
135
+ "title" => "Paddy Clarke Ha Ha Ha",
136
+ "year" => "1993"},
137
+ {"author" => "James Kelman",
138
+ "title" => "How Late It Was, How Late",
139
+ "year" => "1994"},
140
+ {"author" => "Pat Barker",
141
+ "title" => "The Ghost Road",
142
+ "year" => "1995"},
143
+ {"author" => "Graham Swift",
144
+ "title" => "Last Orders",
145
+ "year" => "1996"},
146
+ {"author" => "Arundati Roy",
147
+ "title" => "The God of Small Things",
148
+ "year" => "1997"},
149
+ {"author" => "Ian McEwan",
150
+ "title" => "Amsterdam",
151
+ "year" => "1998"},
152
+ {"author" => "J. M. Coetzee",
153
+ "title" => "Disgrace",
154
+ "year" => "1999"},
155
+ {"author" => "Margaret Atwood",
156
+ "title" => "The Blind Assassin",
157
+ "year" => "2000"},
158
+ {"author" => "Peter Carey",
159
+ "title" => "True History of the Kelly Gang",
160
+ "year" => "2001"},
161
+ {"author" => "Yann Martel",
162
+ "title" => "The Life of Pi",
163
+ "year" => "2002"},
164
+ {"author" => "DBC Pierre",
165
+ "title" => "Vernon God Little",
166
+ "year" => "2003"}
101
167
  ]
102
- docs = []
103
-
104
- books.each do |book|
105
- doc = Document.new()
106
- doc << Field.new("author", book["author"], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
107
- doc << Field.new("title", book["title"], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
108
- doc << Field.new("year", book["year"], Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
109
- docs << doc
110
- end
111
- return docs
112
168
  end
113
169
 
114
- IR_TEST_DOC_CNT = 64
115
-
116
- def IndexTestHelper.prepare_ir_test_docs()
117
- body = "body"
118
- title = "title"
119
- author = "author"
120
- text = "text"
121
- year = "year"
122
- changing_field = "changing_field"
170
+ def self.prepare_ir_test_fis
171
+ fis = FieldInfos.new
172
+ fis.add_field(:body)
173
+ fis.add_field(:changing_field, :term_vector => :no)
174
+ fis.add_field(:title, :index => :untokenized, :term_vector => :with_offsets)
175
+ fis.add_field(:author, :term_vector => :with_positions)
176
+ fis.add_field(:year, :index => :no, :term_vector => :no)
177
+ fis.add_field(:text, :store => :no, :term_vector => :no)
178
+ end
123
179
 
124
- docs = Array.new(IR_TEST_DOC_CNT)
125
- docs[0] = Document.new()
126
- docs[0] << Field.new(body, "Where is Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
127
- docs[0] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
128
- docs[1] = Document.new()
129
- docs[1] << Field.new(body, "Some Random Sentence read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
130
- docs[2] = Document.new()
131
- docs[2] << Field.new(body, "Some read Random Sentence read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
132
- docs[3] = Document.new()
133
- docs[3] << Field.new(title, "War And Peace", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_OFFSETS)
134
- docs[3] << Field.new(body, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
135
- docs[3] << Field.new(author, "Leo Tolstoy", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS)
136
- docs[3] << Field.new(year, "1865", Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
137
- docs[3] << Field.new(text, "more text which is not stored", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO)
138
- docs[4] = Document.new()
139
- docs[4] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
140
- docs[5] = Document.new()
141
- docs[5] << Field.new(body, "Here's Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
142
- docs[6] = Document.new()
143
- docs[6] << Field.new(body, "Some Random Sentence read read read read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
144
- docs[7] = Document.new()
145
- docs[7] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
146
- docs[8] = Document.new()
147
- docs[8] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
148
- docs[9] = Document.new()
149
- docs[9] << Field.new(body, "read Some Random Sentence read this will be used after unfinished next position read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
150
- docs[10] = Document.new()
151
- docs[10] << Field.new(body, "Some read Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
152
- docs[10] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::YES)
153
- docs[11] = Document.new()
154
- docs[11] << Field.new(body, "And here too. Well, maybe Not", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
155
- docs[12] = Document.new()
156
- docs[12] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
157
- docs[13] = Document.new()
158
- docs[13] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
159
- docs[14] = Document.new()
160
- docs[14] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
161
- docs[15] = Document.new()
162
- docs[15] << Field.new(body, "Some read Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
163
- docs[16] = Document.new()
164
- docs[16] << Field.new(body, "Some Random read read Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
165
- docs[17] = Document.new()
166
- docs[17] << Field.new(body, "Some Random read Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
167
- docs[17] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS)
168
- docs[18] = Document.new()
169
- docs[18] << Field.new(body, "Wally Wally Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
170
- docs[19] = Document.new()
171
- docs[19] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
172
- docs[19] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_OFFSETS)
173
- docs[20] = Document.new()
174
- docs[20] << Field.new(body, "Wally is where Wally usually likes to go. Wally Mart! Wally likes shopping there for Where's Wally books. Wally likes to read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
175
- docs[20] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
176
- docs[21] = Document.new()
177
- docs[21] << Field.new(body, "Some Random Sentence read read read and more read read read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
178
- docs[21] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
180
+ INDEX_TEST_DOC_COUNT = 64
181
+ def self.prepare_ir_test_docs
182
+ docs = []
183
+ docs[0] = {
184
+ :body => "Where is Wally",
185
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
186
+ "word3 word3",
187
+ }
188
+ docs[1] = {
189
+ :body => "Some Random Sentence read"
190
+ }
191
+ docs[2] = {
192
+ :body => "Some read Random Sentence read"
193
+ }
194
+ docs[3] = {
195
+ :title => "War And Peace",
196
+ :body => "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3",
197
+ :author => "Leo Tolstoy",
198
+ :year => "1865",
199
+ :text => "more text which is not stored"
200
+ }
201
+ docs[4] = {
202
+ :body => "Some Random Sentence"
203
+ }
204
+ docs[5] = {
205
+ :body => "Here's Wally"
206
+ }
207
+ docs[6] = {
208
+ :body => "Some Random Sentence read read read read"
209
+ }
210
+ docs[7] = {
211
+ :body => "Some Random Sentence"
212
+ }
213
+ docs[8] = {
214
+ :body => "Some Random Sentence"
215
+ }
216
+ docs[9] = {
217
+ :body => "read Some Random Sentence read this will be used after " +
218
+ "unfinished next position read"
219
+ }
220
+ docs[10] = {
221
+ :body => "Some read Random Sentence",
222
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
223
+ "word3 word3"
224
+ }
225
+ docs[11] = {
226
+ :body => "And here too. Well, maybe Not"
227
+ }
228
+ docs[12] = {
229
+ :body => "Some Random Sentence"
230
+ }
231
+ docs[13] = {
232
+ :body => "Some Random Sentence"
233
+ }
234
+ docs[14] = {
235
+ :body => "Some Random Sentence"
236
+ }
237
+ docs[15] = {
238
+ :body => "Some Random Sentence"
239
+ }
240
+ docs[16] = {
241
+ :body => "Some Random read read Sentence"
242
+ }
243
+ docs[17] = {
244
+ :body => "Some Random read Sentence",
245
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
246
+ "word3 word3"
247
+ }
248
+ docs[18] = {
249
+ :body => "Wally Wally Wally"
250
+ }
251
+ docs[19] = {
252
+ :body => "Some Random Sentence",
253
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
254
+ "word3 word3"
255
+ }
256
+ docs[20] = {
257
+ :body => "Wally is where Wally usually likes to go. Wally Mart! Wally " +
258
+ "likes shopping there for Where's Wally books. Wally likes " +
259
+ "to read",
260
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
261
+ "word3 word3"
262
+ }
263
+ docs[21] = {
264
+ :body => "Some Random Sentence read read read and more read read read",
265
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
266
+ "word3 word3"
267
+ }
179
268
 
180
269
  buf = ""
181
270
  21.times { buf << "skip " }
182
- 22.upto(IR_TEST_DOC_CNT) do |i|
271
+ 22.upto(INDEX_TEST_DOC_COUNT-1) do |i|
183
272
  buf << "skip "
184
- docs[i] = Document.new()
185
- docs[i] << Field.new(text, buf.clone, Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
273
+ docs[i] = {:text => buf.clone}
186
274
  end
187
275
  return docs
188
276
  end
189
277
 
190
- def IndexTestHelper.prepare_search_docs
191
- data = [
192
- {"date" => "20050930", "field" => "word1",
193
- "cat" => "cat1/"},
194
- {"date" => "20051001", "field" => "word1 word2 the quick brown fox",
195
- "cat" => "cat1/sub1"},
196
- {"date" => "20051002", "field" => "word1 word3",
197
- "cat" => "cat1/sub1/subsub1"},
198
- {"date" => "20051003", "field" => "word1 word3",
199
- "cat" => "cat1/sub2"},
200
- {"date" => "20051004", "field" => "word1 word2",
201
- "cat" => "cat1/sub2/subsub2"},
202
- {"date" => "20051005", "field" => "word1",
203
- "cat" => "cat2/sub1"},
204
- {"date" => "20051006", "field" => "word1 word3",
205
- "cat" => "cat2/sub1"},
206
- {"date" => "20051007", "field" => "word1",
207
- "cat" => "cat2/sub1"},
208
- {"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
209
- "cat" => "cat2/sub1"},
210
- {"date" => "20051009", "field" => "word1",
211
- "cat" => "cat3/sub1"},
212
- {"date" => "20051010", "field" => "word1",
213
- "cat" => "cat3/sub1"},
214
- {"date" => "20051011", "field" => "word1 word3 the quick red fox",
215
- "cat" => "cat3/sub1"},
216
- {"date" => "20051012", "field" => "word1",
217
- "cat" => "cat3/sub1"},
218
- {"date" => "20051013", "field" => "word1",
219
- "cat" => "cat1/sub2"},
220
- {"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
221
- "cat" => "cat1/sub1"},
222
- {"date" => "20051015", "field" => "word1",
223
- "cat" => "cat1/sub2/subsub1"},
224
- {"date" => "20051016",
225
- "field" => "word1 the quick fox is brown and hairy and a little red",
226
- "cat" => "cat1/sub1/subsub2"},
227
- {"date" => "20051017", "field" => "word1 the brown fox is quick and red",
228
- "cat" => "cat1/"}
229
- ]
230
-
231
- docs = []
232
- data.each_with_index do |fields, i|
233
- doc = Document.new()
234
- doc.boost = i+1
278
+ INDEX_TEST_DOCS = self.prepare_ir_test_docs()
279
+ INDEX_TEST_FIS = self.prepare_ir_test_fis()
235
280
 
236
- fields.each_pair do |field, text|
237
- doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
238
- end
239
- docs << doc
281
+ def self.prepare_search_docs
282
+ i = 1
283
+ [
284
+ ["20050930", "cat1/", "word1" ],
285
+ ["20051001", "cat1/sub1", "word1 word2 the quick brown fox" ],
286
+ ["20051002", "cat1/sub1/subsub1", "word1 word3" ],
287
+ ["20051003", "cat1/sub2", "word1 word3" ],
288
+ ["20051004", "cat1/sub2/subsub2", "word1 word2" ],
289
+ ["20051005", "cat2/sub1", "word1" ],
290
+ ["20051006", "cat2/sub1", "word1 word3" ],
291
+ ["20051007", "cat2/sub1", "word1" ],
292
+ ["20051008", "cat2/sub1", "word1 word2 word3 the fast brown fox"],
293
+ ["20051009", "cat3/sub1", "word1" ],
294
+ ["20051010", "cat3/sub1", "word1" ],
295
+ ["20051011", "cat3/sub1", "word1 word3 the quick red fox" ],
296
+ ["20051012", "cat3/sub1", "word1" ],
297
+ ["20051013", "cat1/sub2", "word1" ],
298
+ ["20051014", "cat1/sub1", "word1 word3 the quick hairy fox" ],
299
+ ["20051015", "cat1/sub2/subsub1", "word1" ],
300
+ ["20051016", "cat1/sub1/subsub2",
301
+ "word1 the quick fox is brown and hairy and a little red" ],
302
+ ["20051017", "cat1/",
303
+ "word1 the brown fox is quick and red" ]
304
+ ].map do |date, category, field|
305
+ doc = Ferret::Document.new(i)
306
+ i += 1
307
+ doc[:date] = date
308
+ doc[:category] = category
309
+ doc[:field] = field
310
+ doc
240
311
  end
241
- return docs
242
- end
243
-
244
- def IndexTestHelper.explain (query, searcher, field)
245
- top_docs = searcher.search(query)
246
- top_docs.score_docs.each { |sd|
247
- puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
248
- }
249
312
  end
250
313
 
314
+ SEARCH_TEST_DOCS = self.prepare_search_docs()
251
315
  end
252
-