RubyGems - ferret - Versions diffs - 0.9.6 → 0.10.0 - Mend

ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (295) hide show

data/MIT-LICENSE +1 -1
data/README +12 -24
data/Rakefile +38 -54
data/TODO +14 -17
data/ext/analysis.c +982 -823
data/ext/analysis.h +133 -76
data/ext/array.c +96 -58
data/ext/array.h +40 -13
data/ext/bitvector.c +476 -118
data/ext/bitvector.h +264 -22
data/ext/compound_io.c +217 -229
data/ext/defines.h +49 -0
data/ext/document.c +107 -317
data/ext/document.h +31 -65
data/ext/except.c +81 -36
data/ext/except.h +117 -55
data/ext/extconf.rb +2 -9
data/ext/ferret.c +211 -104
data/ext/ferret.h +22 -11
data/ext/filter.c +97 -82
data/ext/fs_store.c +348 -367
data/ext/global.c +226 -188
data/ext/global.h +44 -26
data/ext/hash.c +474 -391
data/ext/hash.h +441 -68
data/ext/hashset.c +124 -96
data/ext/hashset.h +169 -20
data/ext/helper.c +56 -5
data/ext/helper.h +7 -0
data/ext/inc/lang.h +29 -49
data/ext/inc/threading.h +31 -0
data/ext/ind.c +288 -278
data/ext/ind.h +68 -0
data/ext/index.c +5688 -0
data/ext/index.h +663 -616
data/ext/lang.h +29 -49
data/ext/libstemmer.c +3 -3
data/ext/mem_pool.c +84 -0
data/ext/mem_pool.h +35 -0
data/ext/posh.c +1006 -0
data/ext/posh.h +1007 -0
data/ext/priorityqueue.c +117 -194
data/ext/priorityqueue.h +135 -39
data/ext/q_boolean.c +1305 -1108
data/ext/q_const_score.c +106 -93
data/ext/q_filtered_query.c +138 -135
data/ext/q_fuzzy.c +206 -242
data/ext/q_match_all.c +94 -80
data/ext/q_multi_term.c +663 -0
data/ext/q_parser.c +667 -593
data/ext/q_phrase.c +992 -555
data/ext/q_prefix.c +72 -61
data/ext/q_range.c +235 -210
data/ext/q_span.c +1480 -1166
data/ext/q_term.c +273 -246
data/ext/q_wildcard.c +127 -114
data/ext/r_analysis.c +1720 -711
data/ext/r_index.c +3049 -0
data/ext/r_qparser.c +433 -146
data/ext/r_search.c +2934 -1993
data/ext/r_store.c +372 -143
data/ext/r_utils.c +941 -0
data/ext/ram_store.c +330 -326
data/ext/search.c +1291 -668
data/ext/search.h +403 -702
data/ext/similarity.c +91 -113
data/ext/similarity.h +45 -30
data/ext/sort.c +721 -484
data/ext/stopwords.c +361 -273
data/ext/store.c +556 -58
data/ext/store.h +706 -126
data/ext/tags +3578 -2780
data/ext/term_vectors.c +352 -0
data/ext/threading.h +31 -0
data/ext/win32.h +54 -0
data/lib/ferret.rb +5 -17
data/lib/ferret/document.rb +130 -2
data/lib/ferret/index.rb +577 -26
data/lib/ferret/number_tools.rb +157 -0
data/lib/ferret_version.rb +3 -0
data/test/test_helper.rb +5 -13
data/test/unit/analysis/tc_analyzer.rb +513 -1
data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
data/test/unit/index/tc_index.rb +183 -240
data/test/unit/index/tc_index_reader.rb +312 -479
data/test/unit/index/tc_index_writer.rb +397 -13
data/test/unit/index/th_doc.rb +269 -206
data/test/unit/query_parser/tc_query_parser.rb +40 -33
data/test/unit/search/tc_filter.rb +59 -71
data/test/unit/search/tc_fuzzy_query.rb +24 -16
data/test/unit/search/tc_index_searcher.rb +23 -201
data/test/unit/search/tc_multi_searcher.rb +78 -226
data/test/unit/search/tc_search_and_sort.rb +93 -81
data/test/unit/search/tc_sort.rb +23 -23
data/test/unit/search/tc_sort_field.rb +7 -7
data/test/unit/search/tc_spans.rb +51 -47
data/test/unit/search/tm_searcher.rb +339 -0
data/test/unit/store/tc_fs_store.rb +1 -1
data/test/unit/store/tm_store_lock.rb +3 -3
data/test/unit/tc_document.rb +81 -0
data/test/unit/ts_analysis.rb +1 -1
data/test/unit/ts_utils.rb +1 -1
data/test/unit/utils/tc_bit_vector.rb +288 -0
data/test/unit/utils/tc_number_tools.rb +117 -0
data/test/unit/utils/tc_priority_queue.rb +106 -0
metadata +140 -301
data/CHANGELOG +0 -9
data/ext/dummy.exe +0 -0
data/ext/field.c +0 -408
data/ext/frtio.h +0 -13
data/ext/inc/except.h +0 -90
data/ext/index_io.c +0 -382
data/ext/index_rw.c +0 -2658
data/ext/lang.c +0 -41
data/ext/nix_io.c +0 -134
data/ext/q_multi_phrase.c +0 -380
data/ext/r_doc.c +0 -582
data/ext/r_index_io.c +0 -1021
data/ext/r_term.c +0 -219
data/ext/term.c +0 -820
data/ext/termdocs.c +0 -611
data/ext/vector.c +0 -637
data/ext/w32_io.c +0 -150
data/lib/ferret/analysis.rb +0 -11
data/lib/ferret/analysis/analyzers.rb +0 -112
data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
data/lib/ferret/analysis/token.rb +0 -100
data/lib/ferret/analysis/token_filters.rb +0 -86
data/lib/ferret/analysis/token_stream.rb +0 -26
data/lib/ferret/analysis/tokenizers.rb +0 -112
data/lib/ferret/analysis/word_list_loader.rb +0 -27
data/lib/ferret/document/document.rb +0 -152
data/lib/ferret/document/field.rb +0 -312
data/lib/ferret/index/compound_file_io.rb +0 -338
data/lib/ferret/index/document_writer.rb +0 -289
data/lib/ferret/index/field_infos.rb +0 -279
data/lib/ferret/index/fields_io.rb +0 -181
data/lib/ferret/index/index.rb +0 -675
data/lib/ferret/index/index_file_names.rb +0 -33
data/lib/ferret/index/index_reader.rb +0 -503
data/lib/ferret/index/index_writer.rb +0 -534
data/lib/ferret/index/multi_reader.rb +0 -377
data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
data/lib/ferret/index/segment_infos.rb +0 -130
data/lib/ferret/index/segment_merge_info.rb +0 -49
data/lib/ferret/index/segment_merge_queue.rb +0 -16
data/lib/ferret/index/segment_merger.rb +0 -358
data/lib/ferret/index/segment_reader.rb +0 -412
data/lib/ferret/index/segment_term_enum.rb +0 -169
data/lib/ferret/index/segment_term_vector.rb +0 -58
data/lib/ferret/index/term.rb +0 -53
data/lib/ferret/index/term_buffer.rb +0 -83
data/lib/ferret/index/term_doc_enum.rb +0 -291
data/lib/ferret/index/term_enum.rb +0 -52
data/lib/ferret/index/term_info.rb +0 -37
data/lib/ferret/index/term_infos_io.rb +0 -321
data/lib/ferret/index/term_vector_offset_info.rb +0 -20
data/lib/ferret/index/term_vectors_io.rb +0 -553
data/lib/ferret/query_parser.rb +0 -312
data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
data/lib/ferret/search.rb +0 -50
data/lib/ferret/search/boolean_clause.rb +0 -100
data/lib/ferret/search/boolean_query.rb +0 -299
data/lib/ferret/search/boolean_scorer.rb +0 -294
data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
data/lib/ferret/search/conjunction_scorer.rb +0 -99
data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
data/lib/ferret/search/explanation.rb +0 -41
data/lib/ferret/search/field_cache.rb +0 -215
data/lib/ferret/search/field_doc.rb +0 -31
data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
data/lib/ferret/search/filter.rb +0 -11
data/lib/ferret/search/filtered_query.rb +0 -130
data/lib/ferret/search/filtered_term_enum.rb +0 -79
data/lib/ferret/search/fuzzy_query.rb +0 -154
data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
data/lib/ferret/search/hit_collector.rb +0 -34
data/lib/ferret/search/hit_queue.rb +0 -11
data/lib/ferret/search/index_searcher.rb +0 -200
data/lib/ferret/search/match_all_query.rb +0 -104
data/lib/ferret/search/multi_phrase_query.rb +0 -216
data/lib/ferret/search/multi_searcher.rb +0 -261
data/lib/ferret/search/multi_term_query.rb +0 -65
data/lib/ferret/search/non_matching_scorer.rb +0 -22
data/lib/ferret/search/phrase_positions.rb +0 -55
data/lib/ferret/search/phrase_query.rb +0 -214
data/lib/ferret/search/phrase_scorer.rb +0 -152
data/lib/ferret/search/prefix_query.rb +0 -54
data/lib/ferret/search/query.rb +0 -140
data/lib/ferret/search/query_filter.rb +0 -51
data/lib/ferret/search/range_filter.rb +0 -103
data/lib/ferret/search/range_query.rb +0 -139
data/lib/ferret/search/req_excl_scorer.rb +0 -125
data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
data/lib/ferret/search/score_doc.rb +0 -38
data/lib/ferret/search/score_doc_comparator.rb +0 -114
data/lib/ferret/search/scorer.rb +0 -91
data/lib/ferret/search/similarity.rb +0 -278
data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
data/lib/ferret/search/sort.rb +0 -112
data/lib/ferret/search/sort_comparator.rb +0 -60
data/lib/ferret/search/sort_field.rb +0 -91
data/lib/ferret/search/spans.rb +0 -12
data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
data/lib/ferret/search/spans/span_first_query.rb +0 -79
data/lib/ferret/search/spans/span_near_query.rb +0 -108
data/lib/ferret/search/spans/span_not_query.rb +0 -130
data/lib/ferret/search/spans/span_or_query.rb +0 -176
data/lib/ferret/search/spans/span_query.rb +0 -25
data/lib/ferret/search/spans/span_scorer.rb +0 -74
data/lib/ferret/search/spans/span_term_query.rb +0 -105
data/lib/ferret/search/spans/span_weight.rb +0 -84
data/lib/ferret/search/spans/spans_enum.rb +0 -44
data/lib/ferret/search/term_query.rb +0 -128
data/lib/ferret/search/term_scorer.rb +0 -183
data/lib/ferret/search/top_docs.rb +0 -36
data/lib/ferret/search/top_field_docs.rb +0 -17
data/lib/ferret/search/weight.rb +0 -54
data/lib/ferret/search/wildcard_query.rb +0 -26
data/lib/ferret/search/wildcard_term_enum.rb +0 -61
data/lib/ferret/stemmers.rb +0 -1
data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
data/lib/ferret/store.rb +0 -5
data/lib/ferret/store/buffered_index_io.rb +0 -190
data/lib/ferret/store/directory.rb +0 -141
data/lib/ferret/store/fs_store.rb +0 -381
data/lib/ferret/store/index_io.rb +0 -245
data/lib/ferret/store/ram_store.rb +0 -286
data/lib/ferret/utils.rb +0 -8
data/lib/ferret/utils/bit_vector.rb +0 -123
data/lib/ferret/utils/date_tools.rb +0 -138
data/lib/ferret/utils/number_tools.rb +0 -91
data/lib/ferret/utils/parameter.rb +0 -41
data/lib/ferret/utils/priority_queue.rb +0 -120
data/lib/ferret/utils/string_helper.rb +0 -47
data/lib/ferret/utils/thread_local.rb +0 -28
data/lib/ferret/utils/weak_key_hash.rb +0 -60
data/lib/rferret.rb +0 -37
data/rake_utils/code_statistics.rb +0 -106
data/test/benchmark/tb_ram_store.rb +0 -76
data/test/benchmark/tb_rw_vint.rb +0 -26
data/test/functional/thread_safety_index_test.rb +0 -81
data/test/functional/thread_safety_test.rb +0 -137
data/test/longrunning/tc_numbertools.rb +0 -60
data/test/longrunning/tm_store.rb +0 -19
data/test/unit/analysis/ctc_analyzer.rb +0 -532
data/test/unit/analysis/data/wordfile +0 -6
data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
data/test/unit/analysis/rtc_stop_filter.rb +0 -14
data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
data/test/unit/analysis/tc_token.rb +0 -25
data/test/unit/document/rtc_field.rb +0 -28
data/test/unit/document/tc_document.rb +0 -47
data/test/unit/document/tc_field.rb +0 -98
data/test/unit/index/rtc_compound_file_io.rb +0 -107
data/test/unit/index/rtc_field_infos.rb +0 -127
data/test/unit/index/rtc_fields_io.rb +0 -167
data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
data/test/unit/index/rtc_segment_infos.rb +0 -74
data/test/unit/index/rtc_segment_term_docs.rb +0 -17
data/test/unit/index/rtc_segment_term_enum.rb +0 -60
data/test/unit/index/rtc_segment_term_vector.rb +0 -71
data/test/unit/index/rtc_term_buffer.rb +0 -57
data/test/unit/index/rtc_term_info.rb +0 -19
data/test/unit/index/rtc_term_infos_io.rb +0 -192
data/test/unit/index/rtc_term_vectors_io.rb +0 -108
data/test/unit/index/tc_term.rb +0 -27
data/test/unit/index/tc_term_voi.rb +0 -18
data/test/unit/search/rtc_similarity.rb +0 -37
data/test/unit/search/rtc_sort_field.rb +0 -14
data/test/unit/search/tc_multi_searcher2.rb +0 -126
data/test/unit/store/rtc_fs_store.rb +0 -62
data/test/unit/store/rtc_ram_store.rb +0 -15
data/test/unit/store/rtm_store.rb +0 -150
data/test/unit/store/rtm_store_lock.rb +0 -2
data/test/unit/ts_document.rb +0 -2
data/test/unit/utils/rtc_bit_vector.rb +0 -73
data/test/unit/utils/rtc_date_tools.rb +0 -50
data/test/unit/utils/rtc_number_tools.rb +0 -59
data/test/unit/utils/rtc_parameter.rb +0 -40
data/test/unit/utils/rtc_priority_queue.rb +0 -62
data/test/unit/utils/rtc_string_helper.rb +0 -21
data/test/unit/utils/rtc_thread.rb +0 -61
data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
data/test/utils/number_to_spoken.rb +0 -132

data/test/unit/index/tc_index_writer.rb CHANGED Viewed

@@ -7,6 +7,8 @@ class IndexWriterTest < Test::Unit::TestCase
   def setup()
     @dir = Ferret::Store::RAMDirectory.new
+    fis = FieldInfos.new()
+    fis.create_index(@dir)
   end
   def tear_down()
@@ -18,7 +20,7 @@ class IndexWriterTest < Test::Unit::TestCase
     clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
     assert(! wlock.locked?)
     assert(! clock.locked?)
-    iw = IndexWriter.new(@dir, :create => true)
+    iw = IndexWriter.new(:dir => @dir, :create => true)
     assert(@dir.exists?("segments"))
     assert(wlock.locked?)
     iw.close()
@@ -28,26 +30,408 @@ class IndexWriterTest < Test::Unit::TestCase
   end
   def test_add_document
-    iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
-    doc = IndexTestHelper.prepare_document()
-    iw.add_document(doc)
+    iw = IndexWriter.new(:dir => @dir,
+                         :analyzer => StandardAnalyzer.new(),
+                         :create => true)
+    iw << {:title => "first doc", :content => ["contents of", "first doc"]}
     assert_equal(1, iw.doc_count)
+    iw << ["contents of", "second doc"]
+    assert_equal(2, iw.doc_count)
+    iw << "contents of third doc"
+    assert_equal(3, iw.doc_count)
     iw.close()
   end
-  def test_add_documents
-    iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
-    # uncomment the following line to see logging
-    #iw.info_stream = $stdout
+  def test_add_documents_fuzzy
+    iw = IndexWriter.new(:dir => @dir,
+                         :analyzer => StandardAnalyzer.new())
     iw.merge_factor = 3
-    iw.min_merge_docs = 3
-    docs = IndexTestHelper.prepare_book_list()
-    docs.each_with_index do |doc, i|
-      #puts "Index doc " + i.to_s
+    iw.max_buffered_docs = 3
+    # add 100 documents
+    100.times do
+      doc = random_doc()
       iw.add_document(doc)
     end
-    assert_equal(37, iw.doc_count)
+    assert_equal(100, iw.doc_count)
     iw.close()
   end
+  private
+  WORDS = [
+    "desirous", "hollowness's", "camp's", "Senegal", "broadcaster's",
+    "pecking", "Provence", "paternalism", "premonition", "Dumbo's",
+    "Darlene's", "Elbert's", "substrate", "Camille", "Menkalinan", "Cooper",
+    "decamps", "abatement's", "bindings", "scrubby", "subset", "ancestor's",
+    "pelagic", "abscissa", "loofah's", "gleans", "boudoir", "disappointingly",
+    "guardianship's", "settlers", "Mylar", "timetable's", "parabolic",
+    "madams", "bootlegger's", "monotonically", "gage", "Karyn's", "deposed",
+    "boozy", "swordfish's", "Chevron", "Victrola", "Tameka", "impels",
+    "carrels", "salami's", "celibate", "resistance's", "duration",
+    "abscissae", "Kilroy's", "corrosive", "flight's", "flapper", "scare",
+    "peppiest", "Pygmies", "Menzies", "wrist's", "enumerable", "housecoats",
+    "Khwarizmi's", "stampeding", "hungering", "steeping", "Yemenis",
+    "entangles", "solver", "mishapping", "Rand's", "ninety", "Boris",
+    "impedimenta", "predators", "ridge", "wretchedness's", "crapping", "Head",
+    "Edwards", "Claude's", "geodesics", "verities", "botch", "Short's",
+    "vellum's", "coruscates", "hydrogenates", "Haas's", "deceitfulness",
+    "cohort's", "Cepheus", "totes", "Cortez's", "napalm", "fruitcake",
+    "coordinated", "Coulomb", "desperation", "behoves", "contractor's",
+    "vacationed", "Wanamaker's", "leotard", "filtrated", "cringes", "Lugosi",
+    "sheath's", "orb", "jawed", "Isidro", "geophysics", "persons", "Asians",
+    "booze's", "eight's", "backslappers", "hankered", "dos", "helpings",
+    "tough", "interlarding", "gouger", "inflect", "Juneau's", "hay's",
+    "sardining", "spays", "Brandi", "depressant", "space", "assess",
+    "reappearance's", "Eli's", "Cote", "Enoch", "chants", "ruffianing",
+    "moralised", "unsuccessfully", "or", "Maryland's", "mildest", "unsafer",
+    "dutiful", "Pribilof", "teas", "vagued", "microbiologists", "hedgerow",
+    "speller's", "conservators", "catharsis", "drawbacks", "whooshed",
+    "unlawful", "revolve", "craftsmanship", "destabilise", "Margarito",
+    "Asgard's", "spawn's", "Annabel's", "canonicals", "buttermilk",
+    "exaltation's", "pothole", "reprints", "approximately", "homage",
+    "Wassermann's", "Atlantic's", "exacerbated", "Huerta", "keypunching",
+    "engagements", "dilate", "ponchos", "Helvetius", "Krakatoa", "basket's",
+    "stepmother", "schlock's", "drippings", "cardiology's", "northwesterly",
+    "cruddier", "poesies", "rustproof", "climb", "miscalled", "Belgians",
+    "Iago", "brownout", "nurseries", "hooliganism's", "concourse's",
+    "advocate", "sunrise's", "hyper", "octopus's", "erecting",
+    "counterattacking", "redesign", "studies", "nitrating", "milestone",
+    "bawls", "Nereid", "inferring", "Ontario's", "annexed", "treasury",
+    "cosmogony's", "scandalised", "shindig's", "detention's",
+    "Lollobrigida's", "eradicating", "magpie", "supertankers", "Adventist's",
+    "dozes", "Artaxerxes", "accumulate", "dankest", "telephony", "flows",
+    "Srivijaya's", "fourteen's", "antonym", "rancid", "briefing's",
+    "theologian", "Jacuzzi", "gracing", "chameleon's", "Brittney's",
+    "Pullmans", "Robitussin's", "jitterier", "mayonnaise's", "fort",
+    "closeouts", "amatory", "Drew's", "cockfight", "pyre", "Laura's",
+    "Bradley's", "obstructionists", "interventions", "tenderness's",
+    "loadstones", "castigation's", "undercut", "volubly", "meditated",
+    "Ypsilanti", "Jannie's", "tams", "drummer's", "inaugurations", "mawing",
+    "Anglophile", "Sherpa", "footholds", "Gonzalo", "removers",
+    "customisation", "procurement's", "allured", "grimaced", "captaining",
+    "liberates", "grandeur's", "Windsor", "screwdrivers", "Flynn's",
+    "extortionists", "carnivorous", "thinned", "panhandlers", "trust's",
+    "bemoaned", "untwisted", "cantors", "rectifies", "speculation",
+    "niacin's", "soppy", "condom", "halberd", "Leadbelly", "vocation's",
+    "tanners", "chanticleer", "secretariats", "Ecuador's", "suppurated",
+    "users", "slag's", "atrocity's", "pillar", "sleeveless", "bulldozers",
+    "turners", "hemline", "astounded", "rosaries", "Mallarmé", "crucifies",
+    "Maidenform", "contribution", "evolve", "chemicals", "uteri",
+    "expostulation", "roamers", "daiquiris", "arraignment", "ribs", "King's",
+    "Persepolis", "arsenic's", "blindfolds", "bloodsucker's", "restocks",
+    "falconry", "Olympia's", "Colosseum's", "vigils", "Louie's",
+    "unwillingly", "sealed", "potatoes", "Argentine", "audit's", "outworn",
+    "boggles", "likely", "alleging", "Tinkerbell", "redistribution's",
+    "Normandy", "Cortes", "porter's", "buntings", "cornucopias", "rosewoods",
+    "shelf's", "airdrops", "summits", "Rosalyn", "redecorating", "twirlers",
+    "monsters", "directed", "semiautomatics", "Foch", "Hobart", "mutilates",
+    "Wilma's", "ornamenting", "Clifford's", "pyromania", "Strasbourg",
+    "bleeders", "additions", "super", "effortlessly", "piecing", "vacations",
+    "gybes", "warranted", "Ting", "her", "histrionic", "marshaled", "spore's",
+    "villainy's", "brat", "confusion", "amphitheatre's", "adjourns",
+    "guzzled", "Visayans", "rogue's", "morsels", "candlestick", "flaks",
+    "Waterbury", "pulp's", "endorser's", "postdoc", "coffining", "swallowing",
+    "Wrangell", "Marcie's", "Marley", "untapped", "fear's", "Kant",
+    "pursuit's", "normally", "jackals", "orals", "Paramaribo's", "Marilyn's",
+    "Diem's", "narrower", "medicinally", "chickweed's", "pretentiousness",
+    "Lardner", "baritone's", "purrs", "Pam's", "pestles", "Philip's",
+    "Titania", "eccentrics", "Albion's", "greed's", "raggediest",
+    "importations", "Truman", "incentives", "typified", "incurred",
+    "bandstands", "Minnie's", "pleasant", "Sandy's", "perplexities",
+    "crease's", "obliques", "backstop", "Nair's", "perusing", "Quixote's",
+    "sicknesses", "vapour's", "butte", "lariats", "disfavours", "McGuffey",
+    "paediatric", "filtered", "whiff's", "gunboats", "devolved",
+    "extravaganza's", "organism", "giggling", "citadel's", "counterbalances",
+    "executrixes", "Cathay", "marshmallow's", "iniquitous", "Katmai", "Siva",
+    "welled", "impertinence's", "plunger", "rice", "forgers", "Larousse",
+    "pollution's", "medium", "residue's", "rumbas", "Odis", "arrogant",
+    "Jasper's", "panged", "doubted", "vistaing", "decibel's", "modulus's",
+    "chickpea's", "mugger's", "potentates", "sequesters", "academy's",
+    "Turk's", "pharmacology's", "defogger", "clomp", "soulless", "elastic",
+    "la's", "shards", "unfortunate", "counterclaim's", "objections", "towel",
+    "converged", "z", "ionisation", "stirrups", "antiquarians", "constructor",
+    "virtuosity's", "Göteborg", "centigramme's", "translators", "dalliance's",
+    "us", "bullfight", "drawer's", "nonconformist", "handcrafts", "Magritte",
+    "tulle", "plant's", "routine", "colour's", "latency's", "repertoire's",
+    "photocopies", "catalyse", "ashrams", "lagging", "flapjack's",
+    "ayatollahs", "decentest", "pitted", "conformity", "jack", "batsman",
+    "electrifies", "Unitarians", "obtain", "medicates", "tumour's",
+    "nutritionally", "haystack", "bustles", "slut", "satirising", "birettas",
+    "starring", "Kubrick's", "flogs", "chequering", "Menkalinan's",
+    "Barbados's", "Bioko", "swinish", "hades", "perjured", "timing's",
+    "cocaine", "ejecting", "rationalises", "dilettante's", "umping",
+    "capsized", "frogmen", "matt", "prostituting", "bola's", "devolution's",
+    "poxing", "Maritza's", "snob's", "scoped", "Costco", "feral", "sirocco",
+    "rebating", "truculence", "junkier", "nabs", "elicit", "allegiance",
+    "care", "arteriosclerosis's", "nonproliferation's", "doxologies",
+    "disconsolate", "bodega", "designers", "Rembrandt", "apostasies",
+    "garrulousness", "Hertzsprung's", "hayseeds", "noncooperation's",
+    "resentment", "cuticles", "sandboxes", "gimmicks", "magnolia",
+    "invalidity's", "pulverised", "Tinkerbell's", "hypoglycemics",
+    "gunboat's", "workbench's", "fleetingly's", "sportsman's", "trots",
+    "decomposes", "discrepancies", "owls", "obscener", "organic", "stoutness",
+    "councillor's", "Philippine's", "Aline", "coarsening", "suffocated",
+    "infighting's", "peculiarity", "roof's", "premier", "sucked", "churl",
+    "remounts", "intends", "wiles", "unfold", "unperturbed", "wainscotings",
+    "restfuller", "ashtray's", "wader's", "decanters", "gild", "tandems",
+    "spooked", "galling", "annuity's", "opacity", "clamour's", "flaccid",
+    "caroming", "savvying", "mammalian's", "toadstool's", "doohickey", "jibs",
+    "conquests", "dishes", "effusively", "distinctions", "curly", "Peckinpah",
+    "whining", "quasar", "sponge", "infrequent", "Novembers", "cowling",
+    "poem's", "muzzles", "Sufi", "authoritarians", "prompts", "Gavin's",
+    "morphology's", "shenanigan", "narrated", "rapprochement", "Heine",
+    "propane's", "addition", "prefect's", "pining", "dwindles",
+    "compulsiveness's", "objectors", "trudging", "segregates", "language",
+    "enthralled", "explosiveness", "toeing", "drainers", "Merrimack's",
+    "smarten", "bigwig's", "embroiders", "Medicaids", "grammar's", "behest's",
+    "chiseled", "equalled", "factual", "Casablanca's", "dams",
+    "disillusioned", "turtleneck", "Baden", "provinces", "bushwhacked", "fey",
+    "Yangtze", "loan's", "decent", "strobe", "challenger's", "hometown",
+    "Neal", "Ernestine's", "magnetises", "minute", "patrol", "Starbucks",
+    "Bernstein", "signal", "interplanetary", "tweak", "archdeacon",
+    "untoward", "transducer", "azaleas", "levied", "worlds", "talks",
+    "Tancred", "hairsplitting's", "edibility's", "confab", "rosetted",
+    "Spanish", "Americanisation", "Charley", "realm's", "incongruities",
+    "chinstraps", "dollhouses", "binocular", "popgun", "physiotherapy's",
+    "knave's", "angelically", "heartbreaking", "clarions", "bespeaks",
+    "pivotal", "Zosma", "ungrammatical", "dilution", "tidily", "Dejesus's",
+    "taller", "pennyweight's", "freshman", "Jamestown", "chiefer", "amen",
+    "attiring", "appurtenance's", "opiates", "mottoes", "towellings", "ashen",
+    "font's", "spoors", "pupil", "groom's", "skimpy", "achieves",
+    "intolerance's", "ardour's", "exorcist", "bottoming", "snag's",
+    "Frenches", "hysteric's", "ladyfinger's", "differences", "seed",
+    "clubfoot's", "glades", "Elton's", "jargon", "Waldo", "grinning",
+    "coherence's", "winos", "turnround", "appended", "Ethelred's", "delete",
+    "steadfastness's", "miss", "thermoplastic", "depraves", "unctuous",
+    "reanimates", "transfusing", "protects", "Babbage's", "foists", "inn",
+    "etched", "sanctimoniously", "idling", "timepiece", "holistic",
+    "waterside", "ulna's", "swindled", "employables", "zebra", "nieces",
+    "pertained", "usages", "vamp's", "Larry's", "cooler's", "holographs",
+    "clewing", "stubborning", "peaked", "underfeeds", "marshmallows",
+    "agreeable", "beards", "Slovenia's", "nitroglycerin", "palls", "impurer",
+    "armours", "stomachaches", "notification's", "Dixieland's", "crozier's",
+    "neurotic", "kudos", "Tania's", "M", "soundtrack's", "territory's",
+    "sped", "house's", "divisibility", "ingress's", "pummelled", "Isabel",
+    "Dewitt", "seemly", "hutched", "calliope", "lengthwise", "flubs",
+    "Moldavia's", "Mercia", "McBride's", "Lenten", "pulverise", "football",
+    "oligarchy", "Max", "scribbler", "acclimatize", "brainwashes",
+    "apprenticed", "benevolences", "two", "Wodehouse", "crew's", "massacre",
+    "proportionals", "Jewishness's", "instep's", "emissary", "folder",
+    "nonentity's", "convinced", "caption", "kangarooed", "dogie",
+    "vagabonding", "auction's", "appraising", "antimony", "part's",
+    "longitude's", "inconsiderateness's", "pawning", "serer", "solos",
+    "histories", "mushy", "parturition", "munched", "oregano", "inanest",
+    "dryness", "kitchenware", "unexpected", "covens", "cheesecakes",
+    "stakeout's", "Pulaski's", "Yoknapatawpha's", "pinhead", "drifted",
+    "guzzler's", "funking", "sou'wester", "oesophagus's", "highbrow",
+    "contralto", "meningitis", "Mazzini", "raggedest", "vaginas", "misfiring",
+    "margaritas", "wedder", "pointed", "slicked", "garlanded", "comeuppances",
+    "vassals", "Sui", "Concord", "bozos", "Garry's", "Maribel's", "epileptic",
+    "Jehoshaphat's", "revolutionary's", "kneecaps", "songbird", "actively",
+    "Meredith", "toddler", "distrusting", "fuchsias", "perusal", "instills",
+    "deathbed", "sunspot's", "spatula's", "Muscovy", "humaniser", "Keats",
+    "regrets", "deflect", "theories", "nonpluses", "populating", "leniency's",
+    "penicillin's", "gaol's", "borough", "moose's", "dogmata",
+    "transcendentally", "supposition's", "nursed", "Gagarin's", "honest",
+    "Chandrasekhar's", "mudslinger's", "parable", "bonged", "Wyeth's",
+    "Ochoa's", "Grenoble", "steamy", "halter's", "rotisserie's", "pagoda's",
+    "wallaby's", "Yank", "pretzel", "rapist's", "estrange", "hectored",
+    "Puebla's", "conniver", "creditor's", "dole's", "Fotomat", "patents",
+    "heckling", "thickener", "etches", "yogi", "hemstitched", "obverses",
+    "Lipizzaner", "divert", "Strong's", "sagest", "Alabama", "He", "Carrie's",
+    "obligation's", "verity's", "outed", "Rhee", "bluffed", "codas",
+    "crèche's", "unpalatable", "dilettanti", "vestment", "purse's",
+    "inflammation's", "bookmarked", "doing's", "whinnying", "impersonators",
+    "Theiler", "scurried", "resistor", "southerners", "Anacreon",
+    "reconstruction's", "footage", "trespassing", "Kafka", "bottling",
+    "stays", "Gretzky", "overburdening", "princesses", "weathercock's",
+    "atolls", "cheerier", "packet", "surrenders", "teacup", "Sabik's",
+    "undecidable", "lollygagged", "pawl's", "anaesthesiology", "sublimely",
+    "contortionists", "motorcades", "Maureen", "lamasery", "yourselves",
+    "Creighton", "poliomyelitis's", "civil", "outmanoeuvre", "lauded",
+    "closeness", "Humboldt's", "pretzels", "ungrudging", "blackguard's",
+    "sickles", "typo", "narcotics", "linesman", "psychotics", "pictured",
+    "deviltry", "Yahtzee", "Lovelace's", "cerebra", "airiness's", "bewitch",
+    "how", "motherland's", "crate's", "Keenan's", "turnstile's",
+    "pedometer's", "carted", "slipping", "fallow", "Canadian", "ladybird's",
+    "thump", "shopper's", "enters", "scowls", "nematode", "focused",
+    "Riley's", "grainiest", "novas", "snuffled", "leftovers", "deify",
+    "Samoan", "pruning", "contenting", "Khachaturian's", "triads",
+    "genealogies", "psalmist", "shaming", "appropriated", "ignominies",
+    "Beadle's", "MHz", "peerages", "facile", "Seoul", "Janna's", "jig's",
+    "mousiness's", "funnier", "delimiter", "watermark", "sheik's", "Reasoner",
+    "ipecac's", "curdles", "wronged", "Segovia's", "solders", "Dunne's",
+    "contractor", "awards", "hostels", "pinkie's", "Herzl", "misplace",
+    "shuttle", "innovative", "vestries", "cosmoses", "trikes", "Casandra's",
+    "hokier", "carouser's", "summerhouses", "renascence", "decomposed",
+    "Balzac's", "outlast", "shod", "squalling", "smugging", "weighing",
+    "omega's", "selects", "fleetingly", "Finland", "petted", "disrespects",
+    "fetter", "confound", "brads", "Bosnia's", "preposition's", "guy's",
+    "different", "tracts", "paediatrics's", "polygon", "eyetooth's", "Aesop",
+    "pentagons", "professions", "homeowner", "looter's", "intimidated",
+    "lustre's", "loneliness", "catnapped", "counties", "pailful",
+    "Christendom's", "Barents", "penis", "Mumford's", "Nigel", "éclairs",
+    "splats", "diabolical", "popularly", "quart", "abjected", "Rasalgethi",
+    "camel's", "inimical", "overweening", "distention's", "Advil", "casement",
+    "seamier", "avaricious", "sierra's", "caparison's", "moldered", "Cortez",
+    "handmaid's", "disappointment", "billowed", "overpopulated", "outsets",
+    "ray", "smoother", "overkill", "somber", "tiller's", "zigzag", "adviser",
+    "absorption's", "sturdily", "hairy", "bloodmobile", "investiture's",
+    "creature", "ripeness's", "Jonathon", "arborvitae's", "skulduggery",
+    "bog", "skeleton's", "Kit's", "Panamas", "Ashlee's", "jazzy", "snit",
+    "divisive", "caribous", "permuting", "frankest", "annotated", "oak's",
+    "meg's", "Gill", "burrito", "dormancy's", "offings", "Nike",
+    "outnumbered", "skater's", "Portugal", "deficit", "Cannon's", "pockmark",
+    "sediment's", "mailbox", "innuendoed", "retire", "wolfhound's",
+    "nicotine's", "brigade's", "mettle's", "softhearted", "hooey's",
+    "abdication", "Orval", "Jaime", "ship", "hyphenations", "sectarians",
+    "Alabaman", "tagging", "ultras", "schizoids", "medicines", "undersized",
+    "Gray", "maternity's", "bandaging", "scooping", "coercion's", "serapes",
+    "celebrate", "Listerine's", "throve", "crypt's", "nearsighted",
+    "metallurgists", "Delicious", "cotton's", "yoked", "cogitates",
+    "underage", "cigarette's", "hallways", "Cointreau", "ma'am", "spacing's",
+    "foresight", "parkway's", "Edwardian", "mediator", "Turner", "Derrida's",
+    "motorist's", "hobo", "equivalences", "sophism", "peeping", "telescoped",
+    "overproduce", "ductility", "Leblanc", "refractory", "passé", "decodes",
+    "womanising", "flax's", "pond's", "infrequency", "talkativeness's",
+    "settlement's", "Prince", "bating", "multimillionaire", "Schultz",
+    "premiss", "quackery", "bathhouse", "Leno's", "Monday's", "Hung's",
+    "undaunted", "bewaring", "tension's", "Chile's", "Rostand's", "platoons",
+    "rodeo's", "Dionne", "Dyson's", "gingivitis's", "fewer",
+    "electromagnetism's", "scrubbier", "ensconced", "wretcheder", "mica's",
+    "expectorant", "snapper's", "chastised", "habitation", "spry", "bathing",
+    "stealth's", "champagnes", "baleful", "fencing's", "threaded", "codicils",
+    "disgraced", "redcaps", "addends", "Olivier", "clasped", "Gwendolyn",
+    "foment", "angularity's", "strenuously", "gorilla", "misbehaved",
+    "surplus's", "newsier", "positioned", "bloodmobiles", "circumstantials",
+    "person's", "varicose", "Calliope", "plethora", "Olmsted",
+    "reconciliation", "Brendan's", "beset", "totters", "sailors",
+    "parliamentarians", "Whitaker", "hilts", "pummelling", "academician's",
+    "ruse", "discreeter", "appetisingly", "perfections", "anus", "overrode",
+    "pedantry's", "possessed", "germs", "unscrews", "expired",
+    "semitrailer's", "Cupid's", "nonsmoker", "Marathon", "secs", "Hopkins",
+    "freeing", "libelled", "furious", "staccatos", "electroencephalogram's",
+    "malingerer's", "impulses", "briars", "Tran", "hilltops", "sulks",
+    "quailed", "fads", "retrenches", "spouted", "outtake", "puncture's",
+    "rats", "kibitzed", "berets", "omnivorous", "flange", "Mons", "glints",
+    "mansards", "thou", "cuing", "suspected", "Kaiser's", "savvier", "skits",
+    "interdict's", "Booker", "Rubinstein", "Tm's", "crossing's", "dewlap",
+    "guarantor's", "edification's", "joyfullest", "crossed", "chowdering",
+    "sillier", "reloading", "commodity's", "bodkins", "conduced", "coughs",
+    "nucleus's", "sixtieth", "proverbially", "comprehensive", "ineluctably",
+    "patrolmen", "resuscitating", "carpetbag's", "Darrin's", "Yeager",
+    "Bataan's", "spoonsful", "proceeds", "wrongdoer", "Karroo", "heart",
+    "poison", "typifying", "endowment's", "aquanauts", "deaconesses",
+    "homosexuality", "Maxine", "haunching", "centred", "Peking's",
+    "toothiest", "growers", "firebombs", "throbs", "Downy", "contribution's",
+    "sago's", "Cole", "Knoxville", "leftmost", "Nell's", "Baffin", "barrings",
+    "contagions", "disencumbers", "countdown", "quintuple", "perihelion",
+    "creationism's", "actioning", "admiralty", "Mt's", "durability's",
+    "sewer's", "replicas", "oxide", "ripened", "Pisces's", "Cinerama's",
+    "catheters", "oppressive", "roosting", "foggiest", "properly", "Kareem",
+    "Ollie", "minuted", "vehicles", "eel", "remunerates", "swashbuckler's",
+    "remunerative", "sanguining", "Belem's", "forlornly", "rudders",
+    "officialdom", "countertenors", "Upton", "whoop", "animations", "arouses",
+    "millionths", "videocassette", "fledgling", "shake", "exterminated",
+    "Cain's", "trendiest", "wariest", "torpedoes", "airmails", "Cameron's",
+    "discord's", "spitefulness's", "thudded", "menaced", "takeovers",
+    "solicited", "wallpapers", "economic", "cache", "rechargeable", "gongs",
+    "droning", "exemption", "Alaskans", "toothed", "snifter", "Stephens",
+    "prejudge", "doctor's", "bobolinks", "rotates", "valuation's", "narrator",
+    "weaning", "uncle", "shelter", "destitution's", "Edgardo's", "gauge",
+    "Nice", "Adolf's", "rheumatics", "inheritances", "undesirables",
+    "Eileen's", "flyweight's", "scope", "possessiveness", "tipsily",
+    "effulgence", "rematch", "Baltic", "unsteadiest", "rodeos", "gloaming's",
+    "ringers", "randomised", "commissars", "destroyer's", "router",
+    "disengaging", "it's", "Albert", "rampantly", "varmint", "Adkins",
+    "chevron", "insomniac", "bobsledded", "masochist's", "chronometers",
+    "compaction", "Mauro", "sidled", "Highlander's", "snail's", "syllabifies",
+    "application's", "symmetrical", "blacking", "accent's", "sentimentalists",
+    "sonatas", "profanities", "sloping", "Araby", "percolate", "repeated",
+    "youthfulness's", "Loyola", "deliriously", "matriarch's", "tailors",
+    "rerouting", "hairpin", "dispersal", "endowment", "disquieting", "swat",
+    "neckerchieves", "wrinkles", "amoebas", "Darcy", "orthodontics's",
+    "milder", "sneezing", "prescience's", "pads", "wrought", "perspicuity's",
+    "materialist", "pull", "laundryman's", "lazily", "protractor's", "Vic",
+    "photocopier", "guardrooms", "cablecasting", "confirms", "excretions",
+    "combatant", "counterfeiters", "periwig", "genteelest", "router's",
+    "springy", "procreated", "syphon", "parent's", "bigwigs", "rebelled",
+    "milkmaids", "McGee's", "seaworthier", "Bellatrix's", "tenement",
+    "embryologists", "Vaselining", "burrow's", "tonnage's", "Petty's",
+    "chancels", "scouring", "mouser", "recompensed", "guarding", "editor",
+    "raster", "bourgeoisie's", "interpolating", "skinflint's", "transport",
+    "bullfinch", "needlessly", "withholds", "counterclockwise", "panicking",
+    "Ahriman", "flambeing", "contrary", "heartstrings", "whittled", "crib's",
+    "highlighter", "extroverted", "Martinique's", "racquets", "Maldivian",
+    "physiognomy", "Hammarskjold", "massage", "shingling", "neighbourhood",
+    "boobed", "vulture", "intercontinental", "cobblers", "peddlers",
+    "forthrightly", "germicide", "raindrop's", "fir's", "decaffeinates",
+    "wobblier", "abnegated", "cruiser's", "satiety", "trilled", "impending",
+    "gulf", "mountebank", "beltway", "reappointment", "cinematographer",
+    "pylon", "penthouses", "morally", "installs", "Walsh's", "drawstring",
+    "circus's", "Khayyam's", "Myrtle's", "ventrals", "category's",
+    "opportunistic", "grovelling", "warier", "upchuck", "hairdresser's",
+    "Montanans", "jobber", "dazzle", "encirclement's", "muffin's", "coronets",
+    "focus's", "footfall's", "subjunctives", "late", "pedagogued",
+    "dignitaries", "content", "blockbusters", "reminiscent", "mayor",
+    "specifier", "extinction", "nutshell's", "catbird's", "bundle",
+    "gracefulness", "exceed", "estranges", "chancy", "bankrupted", "Avery",
+    "Barnett", "succulence", "stacking", "ensnare", "truck", "embargo",
+    "persecutes", "translation's", "muskrat's", "illumines", "undercoat's",
+    "fleecier", "brick", "qualities", "imprecision", "reprisals", "discounts",
+    "harmonics", "Mann's", "terrorism", "interminable", "Santiago's",
+    "deepness", "tramples", "golder", "voyeurism's", "tent", "particle's",
+    "minuend", "waxwings", "knobby", "trustee", "funnily", "hotheadedness's",
+    "Kristin", "what", "bite", "murmur's", "pustule's", "weeknights",
+    "rocked", "athlete", "ventilates", "impresses", "daguerreotyping",
+    "Gross", "gambols", "villa", "maraud", "disapproval", "apostrophe's",
+    "sheaf", "noisemaker's", "autonomy's", "massing", "daemon's", "Thackeray",
+    "fermenting", "whammy", "philosophise", "empathy", "calamities",
+    "sunbathe", "Qom", "yahoo's", "coxcomb's", "move", "school's",
+    "rainmakers", "shipwreck", "potbelly's", "courageously", "current",
+    "Aleut", "treaties", "U", "always", "Bosch", "impregnating", "bud's",
+    "carat", "centrists", "acquaintance's", "convoy's", "chichis",
+    "restraint's", "Cosby", "factotums", "handshaking", "paragon's",
+    "mileages", "Tammie", "cartoonists", "lemmas", "lowliness's", "onion's",
+    "E's", "Bible", "Cranmer", "fob's", "minks", "overstocking", "Willamette",
+    "needle's", "scuppers", "Carborundum", "upwardly", "tallies", "aptitude",
+    "synod", "nasturtium's", "Pensacola", "snappish", "merino", "sups",
+    "fingerboard's", "prodigy's", "narcissism's", "substantial", "lug",
+    "establishing", "Vergil's", "patrimonies", "shorted", "forestation",
+    "undeniable", "Katmandu", "lamination", "trollop's", "odd", "stanza",
+    "paraplegic", "melanin", "Rico", "foreman", "stereotypes", "affinity's",
+    "cleansing", "sautéing", "epochs", "crooners", "manicured", "undisclosed",
+    "propel", "usage", "Alioth's", "Aurelia's", "peruse", "Vassar's",
+    "Demosthenes's", "Brazos", "supermarket", "scribbles", "Jekyll's",
+    "discomfort's", "mastiffs", "ballasting", "Figueroa", "turnstiles",
+    "convince", "Shelton's", "Gustavo", "shunting", "Fujitsu's", "fining's",
+    "hippos", "dam's", "expressionists", "peewee", "troop's"
+  ]
+  WORDS_SIZE = WORDS.size
+  def random_word
+    return WORDS[rand(WORDS_SIZE)]
+  end
+  def random_sentence(max_len)
+    sentence = ""
+    (1 + rand(max_len)).times { sentence << " " << random_word }
+    return sentence
+  end
+  def random_doc(max_fields = 10, max_elements = 10, max_len = 100)
+    doc = {}
+    (1 + rand(max_fields)).times do
+      field = random_word.intern
+      elem_count = rand(max_elements) + 1
+      if (elem_count == 1)
+        doc[field] = random_sentence(max_len)
+      else
+        doc[field] = []
+        elem_count.times { doc[field] << random_sentence(max_len)}
+      end
+    end
+    return doc
+  end
 end

data/test/unit/index/th_doc.rb CHANGED Viewed

@@ -1,8 +1,4 @@
-require File.dirname(__FILE__) + "/../../test_helper"
 module IndexTestHelper
-  include Ferret::Document
   include Ferret::Index
   include Ferret::Analysis
   include Ferret::Search
@@ -16,23 +12,34 @@ module IndexTestHelper
   BINARY_DATA = IndexTestHelper.make_binary(256)
   COMPRESSED_BINARY_DATA = IndexTestHelper.make_binary(56)
-  def IndexTestHelper.prepare_document
-    doc = Document.new()
-    doc << Field.new("text_field1", "field one text", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
-    doc << Field.new("text_field2", "field field field two text", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    doc << Field.new("key_field", "keyword", Field::Store::YES, Field::Index::UNTOKENIZED)
-    doc << Field.new("unindexed_field", "unindexed field text", Field::Store::YES, Field::Index::NO)
-    doc << Field.new("unstored_field1", "unstored field text one", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO)
-    doc << Field.new("unstored_field2", "unstored field text two", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::YES)
-    doc << Field.new("compressed_field", "compressed text", Field::Store::COMPRESS, Field::Index::TOKENIZED, Field::TermVector::YES)
-    doc << Field.new_binary_field("binary_field", BINARY_DATA, Field::Store::YES)
-    doc << Field.new_binary_field("compressed_binary_field", COMPRESSED_BINARY_DATA, Field::Store::COMPRESS)
-    return doc
+  def IndexTestHelper.prepare_document(dir)
+    fis = FieldInfos.new
+    fis.add_field(:text_field1, :term_vector => :no)
+    fis.add_field(:text_field2)
+    fis.add_field(:key_field, :index => :untokenized)
+    fis.add_field(:unindexed_field, :index => :no)
+    fis.add_field(:unstored_field1, :store => :no, :term_vector => :no)
+    fis.add_field(:unstored_field2, :store => :no, :term_vector => :yes)
+    fis.add_field(:compressed_field, :store => :compressed, :term_vector => :yes)
+    fis.add_field(:binary_field, :index => :no, :term_vector => :no)
+    fis.add_field(:compressed_binary_field, :store => :compressed,
+                  :index => :no, :term_vector => :no)
+    doc = {
+      :text_field1             => "field one text",
+      :text_field2             => "field field field two text",
+      :key_field               => "keyword",
+      :unindexed_field         => "unindexed field text",
+      :unstored_field1         => "unstored field text one",
+      :unstored_field2         => "unstored field text two",
+      :compressed_field        => "compressed text",
+      :binary_field            => BINARY_DATA,
+      :compressed_binary_field => COMPRESSED_BINARY_DATA
+    }
+    return doc, fis
   end
   def IndexTestHelper.prepare_documents
-    data = [
+    [
       ["apple", "green"],
       ["apple", "red"],
       ["orange", "orange"],
@@ -41,212 +48,268 @@ module IndexTestHelper
       ["mandarin", "orange"],
       ["peach", "orange"],
       ["apricot", "orange"]
-    ]
-    docs = []
-    data.each do |food|
-      doc = Document.new()
-      doc << Field.new("name", food[0], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-      doc << Field.new("colour", food[1], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-      docs << doc
-    end
-    return docs
-  end
-  def IndexTestHelper.write_document(dir, doc, segment="test", analyzer = WhiteSpaceAnalyzer.new(), similarity = Similarity.default())
-    writer = DocumentWriter.new(dir, analyzer, similarity, 50)
-    writer.add_document(segment, doc)
+    ].map { |food| {"name" => food[0], "colour" => food[1]} }
   end
   def IndexTestHelper.prepare_book_list
     books = [
-      {"author" => "P.H. Newby", "title" => "Something To Answer For", "year" => "1969"},
-      {"author" => "Bernice Rubens", "title" => "The Elected Member", "year" => "1970"},
-      {"author" => "V. S. Naipaul", "title" => "In a Free State", "year" => "1971"},
-      {"author" => "John Berger", "title" => "G", "year" => "1972"},
-      {"author" => "J. G. Farrell", "title" => "The Siege of Krishnapur", "year" => "1973"},
-      {"author" => "Stanley Middleton", "title" => "Holiday", "year" => "1974"},
-      {"author" => "Nadine Gordimer", "title" => "The Conservationist", "year" => "1974"},
-      {"author" => "Ruth Prawer Jhabvala", "title" => "Heat and Dust", "year" => "1975"},
-      {"author" => "David Storey", "title" => "Saville", "year" => "1976"},
-      {"author" => "Paul Scott", "title" => "Staying On", "year" => "1977"},
-      {"author" => "Iris Murdoch", "title" => "The Sea", "year" => "1978"},
-      {"author" => "Penelope Fitzgerald", "title" => "Offshore", "year" => "1979"},
-      {"author" => "William Golding", "title" => "Rites of Passage", "year" => "1980"},
-      {"author" => "Salman Rushdie", "title" => "Midnight's Children", "year" => "1981"},
-      {"author" => "Thomas Keneally", "title" => "Schindler's Ark", "year" => "1982"},
-      {"author" => "J. M. Coetzee", "title" => "Life and Times of Michael K", "year" => "1983"},
-      {"author" => "Anita Brookner", "title" => "Hotel du Lac", "year" => "1984"},
-      {"author" => "Keri Hulme", "title" => "The Bone People", "year" => "1985"},
-      {"author" => "Kingsley Amis", "title" => "The Old Devils", "year" => "1986"},
-      {"author" => "Penelope Lively", "title" => "Moon Tiger", "year" => "1987"},
-      {"author" => "Peter Carey", "title" => "Oscar and Lucinda", "year" => "1988"},
-      {"author" => "Kazuo Ishiguro", "title" => "The Remains of the Day", "year" => "1989"},
-      {"author" => "A. S. Byatt", "title" => "Possession", "year" => "1990"},
-      {"author" => "Ben Okri", "title" => "The Famished Road", "year" => "1991"},
-      {"author" => "Michael Ondaatje", "title" => "The English Patient", "year" => "1992"},
-      {"author" => "Barry Unsworth", "title" => "Sacred Hunger", "year" => "1992"},
-      {"author" => "Roddy Doyle", "title" => "Paddy Clarke Ha Ha Ha", "year" => "1993"},
-      {"author" => "James Kelman", "title" => "How Late It Was, How Late", "year" => "1994"},
-      {"author" => "Pat Barker", "title" => "The Ghost Road", "year" => "1995"},
-      {"author" => "Graham Swift", "title" => "Last Orders", "year" => "1996"},
-      {"author" => "Arundati Roy", "title" => "The God of Small Things", "year" => "1997"},
-      {"author" => "Ian McEwan", "title" => "Amsterdam", "year" => "1998"},
-      {"author" => "J. M. Coetzee", "title" => "Disgrace", "year" => "1999"},
-      {"author" => "Margaret Atwood", "title" => "The Blind Assassin", "year" => "2000"},
-      {"author" => "Peter Carey", "title" => "True History of the Kelly Gang", "year" => "2001"},
-      {"author" => "Yann Martel", "title" => "The Life of Pi", "year" => "2002"},
-      {"author" => "DBC Pierre", "title" => "Vernon God Little", "year" => "2003"}
+      {"author" => "P.H. Newby",
+        "title" => "Something To Answer For",
+         "year" => "1969"},
+      {"author" => "Bernice Rubens",
+        "title" => "The Elected Member",
+         "year" => "1970"},
+      {"author" => "V. S. Naipaul",
+        "title" => "In a Free State",
+         "year" => "1971"},
+      {"author" => "John Berger",
+        "title" => "G",
+         "year" => "1972"},
+      {"author" => "J. G. Farrell",
+        "title" => "The Siege of Krishnapur",
+         "year" => "1973"},
+      {"author" => "Stanley Middleton",
+        "title" => "Holiday",
+         "year" => "1974"},
+      {"author" => "Nadine Gordimer",
+        "title" => "The Conservationist",
+         "year" => "1974"},
+      {"author" => "Ruth Prawer Jhabvala",
+        "title" => "Heat and Dust",
+         "year" => "1975"},
+      {"author" => "David Storey",
+        "title" => "Saville",
+         "year" => "1976"},
+      {"author" => "Paul Scott",
+        "title" => "Staying On",
+         "year" => "1977"},
+      {"author" => "Iris Murdoch",
+        "title" => "The Sea",
+         "year" => "1978"},
+      {"author" => "Penelope Fitzgerald",
+        "title" => "Offshore",
+         "year" => "1979"},
+      {"author" => "William Golding",
+        "title" => "Rites of Passage",
+         "year" => "1980"},
+      {"author" => "Salman Rushdie",
+        "title" => "Midnight's Children",
+         "year" => "1981"},
+      {"author" => "Thomas Keneally",
+        "title" => "Schindler's Ark",
+         "year" => "1982"},
+      {"author" => "J. M. Coetzee",
+        "title" => "Life and Times of Michael K",
+         "year" => "1983"},
+      {"author" => "Anita Brookner",
+        "title" => "Hotel du Lac",
+         "year" => "1984"},
+      {"author" => "Keri Hulme",
+        "title" => "The Bone People",
+         "year" => "1985"},
+      {"author" => "Kingsley Amis",
+        "title" => "The Old Devils",
+         "year" => "1986"},
+      {"author" => "Penelope Lively",
+        "title" => "Moon Tiger",
+         "year" => "1987"},
+      {"author" => "Peter Carey",
+        "title" => "Oscar and Lucinda",
+         "year" => "1988"},
+      {"author" => "Kazuo Ishiguro",
+        "title" => "The Remains of the Day",
+         "year" => "1989"},
+      {"author" => "A. S. Byatt",
+        "title" => "Possession",
+         "year" => "1990"},
+      {"author" => "Ben Okri",
+        "title" => "The Famished Road",
+         "year" => "1991"},
+      {"author" => "Michael Ondaatje",
+        "title" => "The English Patient",
+         "year" => "1992"},
+      {"author" => "Barry Unsworth",
+        "title" => "Sacred Hunger",
+         "year" => "1992"},
+      {"author" => "Roddy Doyle",
+        "title" => "Paddy Clarke Ha Ha Ha",
+         "year" => "1993"},
+      {"author" => "James Kelman",
+        "title" => "How Late It Was, How Late",
+         "year" => "1994"},
+      {"author" => "Pat Barker",
+        "title" => "The Ghost Road",
+         "year" => "1995"},
+      {"author" => "Graham Swift",
+        "title" => "Last Orders",
+         "year" => "1996"},
+      {"author" => "Arundati Roy",
+        "title" => "The God of Small Things",
+         "year" => "1997"},
+      {"author" => "Ian McEwan",
+        "title" => "Amsterdam",
+         "year" => "1998"},
+      {"author" => "J. M. Coetzee",
+        "title" => "Disgrace",
+         "year" => "1999"},
+      {"author" => "Margaret Atwood",
+        "title" => "The Blind Assassin",
+         "year" => "2000"},
+      {"author" => "Peter Carey",
+        "title" => "True History of the Kelly Gang",
+         "year" => "2001"},
+      {"author" => "Yann Martel",
+        "title" => "The Life of Pi",
+         "year" => "2002"},
+      {"author" => "DBC Pierre",
+        "title" => "Vernon God Little",
+         "year" => "2003"}
     ]
-    docs = []
-    books.each do |book|
-      doc = Document.new()
-      doc << Field.new("author", book["author"], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-      doc << Field.new("title", book["title"], Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-      doc << Field.new("year", book["year"], Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
-      docs << doc
-    end
-    return docs
   end
-  IR_TEST_DOC_CNT = 64
-  def IndexTestHelper.prepare_ir_test_docs()
-    body = "body"
-    title = "title"
-    author = "author"
-    text = "text"
-    year = "year"
-    changing_field = "changing_field"
+  def self.prepare_ir_test_fis
+    fis = FieldInfos.new
+    fis.add_field(:body)
+    fis.add_field(:changing_field, :term_vector => :no)
+    fis.add_field(:title, :index => :untokenized, :term_vector => :with_offsets)
+    fis.add_field(:author, :term_vector => :with_positions)
+    fis.add_field(:year, :index => :no, :term_vector => :no)
+    fis.add_field(:text, :store => :no, :term_vector => :no)
+  end
-    docs = Array.new(IR_TEST_DOC_CNT)
-    docs[0] = Document.new()
-    docs[0] << Field.new(body, "Where is Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[0] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
-    docs[1] = Document.new()
-    docs[1] << Field.new(body, "Some Random Sentence read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[2] = Document.new()
-    docs[2] << Field.new(body, "Some read Random Sentence read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[3] = Document.new()
-    docs[3] << Field.new(title, "War And Peace", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_OFFSETS)
-    docs[3] << Field.new(body, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[3] << Field.new(author, "Leo Tolstoy", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS)
-    docs[3] << Field.new(year, "1865", Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
-    docs[3] << Field.new(text, "more text which is not stored", Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO)
-    docs[4] = Document.new()
-    docs[4] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[5] = Document.new()
-    docs[5] << Field.new(body, "Here's Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[6] = Document.new()
-    docs[6] << Field.new(body, "Some Random Sentence read read read read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[7] = Document.new()
-    docs[7] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[8] = Document.new()
-    docs[8] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[9] = Document.new()
-    docs[9] << Field.new(body, "read Some Random Sentence read this will be used after unfinished next position read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[10] = Document.new()
-    docs[10] << Field.new(body, "Some read Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[10] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::YES)
-    docs[11] = Document.new()
-    docs[11] << Field.new(body, "And here too. Well, maybe Not", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[12] = Document.new()
-    docs[12] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[13] = Document.new()
-    docs[13] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[14] = Document.new()
-    docs[14] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[15] = Document.new()
-    docs[15] << Field.new(body, "Some read Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[16] = Document.new()
-    docs[16] << Field.new(body, "Some Random read read Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[17] = Document.new()
-    docs[17] << Field.new(body, "Some Random read Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[17] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS)
-    docs[18] = Document.new()
-    docs[18] << Field.new(body, "Wally Wally Wally", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[19] = Document.new()
-    docs[19] << Field.new(body, "Some Random Sentence", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[19] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_OFFSETS)
-    docs[20] = Document.new()
-    docs[20] << Field.new(body, "Wally is where Wally usually likes to go. Wally Mart! Wally likes shopping there for Where's Wally books. Wally likes to read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[20] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[21] = Document.new()
-    docs[21] << Field.new(body, "Some Random Sentence read read read and more read read read", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
-    docs[21] << Field.new(changing_field, "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3", Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO)
+  INDEX_TEST_DOC_COUNT = 64
+  def self.prepare_ir_test_docs
+    docs = []
+    docs[0] = {
+      :body => "Where is Wally",
+      :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
+                         "word3 word3",
+    }
+    docs[1] = {
+      :body => "Some Random Sentence read"
+    }
+    docs[2] = {
+      :body => "Some read Random Sentence read"
+    }
+    docs[3] = {
+      :title  => "War And Peace",
+      :body   => "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3",
+      :author => "Leo Tolstoy",
+      :year   => "1865",
+      :text   => "more text which is not stored"
+    }
+    docs[4] = {
+      :body => "Some Random Sentence"
+    }
+    docs[5] = {
+      :body => "Here's Wally"
+    }
+    docs[6] = {
+      :body => "Some Random Sentence read read read read"
+    }
+    docs[7] = {
+      :body => "Some Random Sentence"
+    }
+    docs[8] = {
+      :body => "Some Random Sentence"
+    }
+    docs[9] = {
+      :body => "read Some Random Sentence read this will be used after " +
+               "unfinished next position read"
+    }
+    docs[10] = {
+      :body => "Some read Random Sentence",
+      :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
+                         "word3 word3"
+    }
+    docs[11] = {
+      :body => "And here too. Well, maybe Not"
+    }
+    docs[12] = {
+      :body => "Some Random Sentence"
+    }
+    docs[13] = {
+      :body => "Some Random Sentence"
+    }
+    docs[14] = {
+      :body => "Some Random Sentence"
+    }
+    docs[15] = {
+      :body => "Some Random Sentence"
+    }
+    docs[16] = {
+      :body => "Some Random read read Sentence"
+    }
+    docs[17] = {
+      :body => "Some Random read Sentence",
+      :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
+                         "word3 word3"
+    }
+    docs[18] = {
+      :body => "Wally Wally Wally"
+    }
+    docs[19] = {
+      :body => "Some Random Sentence",
+      :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
+                         "word3 word3"
+    }
+    docs[20] = {
+      :body => "Wally is where Wally usually likes to go. Wally Mart! Wally " +
+               "likes shopping there for Where's Wally books. Wally likes " +
+               "to read",
+      :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
+                         "word3 word3"
+    }
+    docs[21] = {
+      :body => "Some Random Sentence read read read and more read read read",
+      :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
+                         "word3 word3"
+    }
     buf = ""
     21.times { buf << "skip " }
-    22.upto(IR_TEST_DOC_CNT) do |i|
+    22.upto(INDEX_TEST_DOC_COUNT-1) do |i|
       buf << "skip "
-      docs[i] = Document.new()
-      docs[i] << Field.new(text, buf.clone, Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
+      docs[i] = {:text => buf.clone}
     end
     return docs
   end
-  def IndexTestHelper.prepare_search_docs
-    data = [
-      {"date" => "20050930", "field" => "word1",
-        "cat" => "cat1/"},
-      {"date" => "20051001", "field" => "word1 word2 the quick brown fox",
-        "cat" => "cat1/sub1"},
-      {"date" => "20051002", "field" => "word1 word3",
-        "cat" => "cat1/sub1/subsub1"},
-      {"date" => "20051003", "field" => "word1 word3",
-        "cat" => "cat1/sub2"},
-      {"date" => "20051004", "field" => "word1 word2",
-        "cat" => "cat1/sub2/subsub2"},
-      {"date" => "20051005", "field" => "word1",
-        "cat" => "cat2/sub1"},
-      {"date" => "20051006", "field" => "word1 word3",
-        "cat" => "cat2/sub1"},
-      {"date" => "20051007", "field" => "word1",
-        "cat" => "cat2/sub1"},
-      {"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
-        "cat" => "cat2/sub1"},
-      {"date" => "20051009", "field" => "word1",
-        "cat" => "cat3/sub1"},
-      {"date" => "20051010", "field" => "word1",
-        "cat" => "cat3/sub1"},
-      {"date" => "20051011", "field" => "word1 word3 the quick red fox",
-        "cat" => "cat3/sub1"},
-      {"date" => "20051012", "field" => "word1",
-        "cat" => "cat3/sub1"},
-      {"date" => "20051013", "field" => "word1",
-        "cat" => "cat1/sub2"},
-      {"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
-        "cat" => "cat1/sub1"},
-      {"date" => "20051015", "field" => "word1",
-        "cat" => "cat1/sub2/subsub1"},
-      {"date" => "20051016",
-        "field" => "word1 the quick fox is brown and hairy and a little red",
-        "cat" => "cat1/sub1/subsub2"},
-      {"date" => "20051017", "field" => "word1 the brown fox is quick and red",
-        "cat" => "cat1/"}
-    ]
-    docs = []
-    data.each_with_index do |fields, i|
-      doc = Document.new()
-      doc.boost = i+1
+  INDEX_TEST_DOCS = self.prepare_ir_test_docs()
+  INDEX_TEST_FIS = self.prepare_ir_test_fis()
-      fields.each_pair do |field, text|
-        doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
-      end
-      docs << doc
+  def self.prepare_search_docs
+    i = 1
+    [
+      ["20050930", "cat1/",             "word1"                               ],
+      ["20051001", "cat1/sub1",         "word1 word2 the quick brown fox"     ],
+      ["20051002", "cat1/sub1/subsub1", "word1 word3"                         ],
+      ["20051003", "cat1/sub2",         "word1 word3"                         ],
+      ["20051004", "cat1/sub2/subsub2", "word1 word2"                         ],
+      ["20051005", "cat2/sub1",         "word1"                               ],
+      ["20051006", "cat2/sub1",         "word1 word3"                         ],
+      ["20051007", "cat2/sub1",         "word1"                               ],
+      ["20051008", "cat2/sub1",         "word1 word2 word3 the fast brown fox"],
+      ["20051009", "cat3/sub1",         "word1"                               ],
+      ["20051010", "cat3/sub1",         "word1"                               ],
+      ["20051011", "cat3/sub1",         "word1 word3 the quick red fox"       ],
+      ["20051012", "cat3/sub1",         "word1"                               ],
+      ["20051013", "cat1/sub2",         "word1"                               ],
+      ["20051014", "cat1/sub1",         "word1 word3 the quick hairy fox"     ],
+      ["20051015", "cat1/sub2/subsub1", "word1"                               ],
+      ["20051016", "cat1/sub1/subsub2",
+        "word1 the quick fox is brown and hairy and a little red"             ],
+      ["20051017", "cat1/",
+        "word1 the brown fox is quick and red"                                ]
+    ].map do |date, category, field|
+      doc = Ferret::Document.new(i)
+      i += 1
+      doc[:date] = date
+      doc[:category] = category
+      doc[:field] = field
+      doc
     end
-    return docs
-  end
-  def IndexTestHelper.explain (query, searcher, field)
-    top_docs = searcher.search(query)
-    top_docs.score_docs.each { |sd|
-      puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
-    }
   end
+  SEARCH_TEST_DOCS = self.prepare_search_docs()
 end