sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,437 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class IndexWriterTest < Test::Unit::TestCase
5
+ include Ferret::Index
6
+ include Ferret::Analysis
7
+
8
+ def setup()
9
+ @dir = Ferret::Store::RAMDirectory.new
10
+ fis = FieldInfos.new()
11
+ fis.create_index(@dir)
12
+ end
13
+
14
+ def teardown()
15
+ @dir.close()
16
+ end
17
+
18
+ def test_initialize
19
+ wlock = @dir.make_lock(IndexWriter::WRITE_LOCK_NAME)
20
+ clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
21
+ assert(! wlock.locked?)
22
+ assert(! clock.locked?)
23
+ iw = IndexWriter.new(:dir => @dir, :create => true)
24
+ assert(@dir.exists?("segments"))
25
+ assert(wlock.locked?)
26
+ iw.close()
27
+ assert(@dir.exists?("segments"))
28
+ assert(! wlock.locked?)
29
+ assert(! clock.locked?)
30
+ end
31
+
32
+ def test_add_document
33
+ iw = IndexWriter.new(:dir => @dir,
34
+ :analyzer => StandardAnalyzer.new(),
35
+ :create => true)
36
+ iw << {:title => "first doc", :content => ["contents of", "first doc"]}
37
+ assert_equal(1, iw.doc_count)
38
+ iw << ["contents of", "second doc"]
39
+ assert_equal(2, iw.doc_count)
40
+ iw << "contents of third doc"
41
+ assert_equal(3, iw.doc_count)
42
+ iw.close()
43
+ end
44
+
45
+ def test_add_documents_fuzzy
46
+ iw = IndexWriter.new(:dir => @dir,
47
+ :analyzer => StandardAnalyzer.new())
48
+ iw.merge_factor = 3
49
+ iw.max_buffered_docs = 3
50
+
51
+ # add 100 documents
52
+ 100.times do
53
+ doc = random_doc()
54
+ iw.add_document(doc)
55
+ end
56
+ assert_equal(100, iw.doc_count)
57
+ iw.close()
58
+ end
59
+
60
+ private
61
+
62
+ WORDS = [
63
+ "desirous", "hollowness's", "camp's", "Senegal", "broadcaster's",
64
+ "pecking", "Provence", "paternalism", "premonition", "Dumbo's",
65
+ "Darlene's", "Elbert's", "substrate", "Camille", "Menkalinan", "Cooper",
66
+ "decamps", "abatement's", "bindings", "scrubby", "subset", "ancestor's",
67
+ "pelagic", "abscissa", "loofah's", "gleans", "boudoir", "disappointingly",
68
+ "guardianship's", "settlers", "Mylar", "timetable's", "parabolic",
69
+ "madams", "bootlegger's", "monotonically", "gage", "Karyn's", "deposed",
70
+ "boozy", "swordfish's", "Chevron", "Victrola", "Tameka", "impels",
71
+ "carrels", "salami's", "celibate", "resistance's", "duration",
72
+ "abscissae", "Kilroy's", "corrosive", "flight's", "flapper", "scare",
73
+ "peppiest", "Pygmies", "Menzies", "wrist's", "enumerable", "housecoats",
74
+ "Khwarizmi's", "stampeding", "hungering", "steeping", "Yemenis",
75
+ "entangles", "solver", "mishapping", "Rand's", "ninety", "Boris",
76
+ "impedimenta", "predators", "ridge", "wretchedness's", "crapping", "Head",
77
+ "Edwards", "Claude's", "geodesics", "verities", "botch", "Short's",
78
+ "vellum's", "coruscates", "hydrogenates", "Haas's", "deceitfulness",
79
+ "cohort's", "Cepheus", "totes", "Cortez's", "napalm", "fruitcake",
80
+ "coordinated", "Coulomb", "desperation", "behoves", "contractor's",
81
+ "vacationed", "Wanamaker's", "leotard", "filtrated", "cringes", "Lugosi",
82
+ "sheath's", "orb", "jawed", "Isidro", "geophysics", "persons", "Asians",
83
+ "booze's", "eight's", "backslappers", "hankered", "dos", "helpings",
84
+ "tough", "interlarding", "gouger", "inflect", "Juneau's", "hay's",
85
+ "sardining", "spays", "Brandi", "depressant", "space", "assess",
86
+ "reappearance's", "Eli's", "Cote", "Enoch", "chants", "ruffianing",
87
+ "moralised", "unsuccessfully", "or", "Maryland's", "mildest", "unsafer",
88
+ "dutiful", "Pribilof", "teas", "vagued", "microbiologists", "hedgerow",
89
+ "speller's", "conservators", "catharsis", "drawbacks", "whooshed",
90
+ "unlawful", "revolve", "craftsmanship", "destabilise", "Margarito",
91
+ "Asgard's", "spawn's", "Annabel's", "canonicals", "buttermilk",
92
+ "exaltation's", "pothole", "reprints", "approximately", "homage",
93
+ "Wassermann's", "Atlantic's", "exacerbated", "Huerta", "keypunching",
94
+ "engagements", "dilate", "ponchos", "Helvetius", "Krakatoa", "basket's",
95
+ "stepmother", "schlock's", "drippings", "cardiology's", "northwesterly",
96
+ "cruddier", "poesies", "rustproof", "climb", "miscalled", "Belgians",
97
+ "Iago", "brownout", "nurseries", "hooliganism's", "concourse's",
98
+ "advocate", "sunrise's", "hyper", "octopus's", "erecting",
99
+ "counterattacking", "redesign", "studies", "nitrating", "milestone",
100
+ "bawls", "Nereid", "inferring", "Ontario's", "annexed", "treasury",
101
+ "cosmogony's", "scandalised", "shindig's", "detention's",
102
+ "Lollobrigida's", "eradicating", "magpie", "supertankers", "Adventist's",
103
+ "dozes", "Artaxerxes", "accumulate", "dankest", "telephony", "flows",
104
+ "Srivijaya's", "fourteen's", "antonym", "rancid", "briefing's",
105
+ "theologian", "Jacuzzi", "gracing", "chameleon's", "Brittney's",
106
+ "Pullmans", "Robitussin's", "jitterier", "mayonnaise's", "fort",
107
+ "closeouts", "amatory", "Drew's", "cockfight", "pyre", "Laura's",
108
+ "Bradley's", "obstructionists", "interventions", "tenderness's",
109
+ "loadstones", "castigation's", "undercut", "volubly", "meditated",
110
+ "Ypsilanti", "Jannie's", "tams", "drummer's", "inaugurations", "mawing",
111
+ "Anglophile", "Sherpa", "footholds", "Gonzalo", "removers",
112
+ "customisation", "procurement's", "allured", "grimaced", "captaining",
113
+ "liberates", "grandeur's", "Windsor", "screwdrivers", "Flynn's",
114
+ "extortionists", "carnivorous", "thinned", "panhandlers", "trust's",
115
+ "bemoaned", "untwisted", "cantors", "rectifies", "speculation",
116
+ "niacin's", "soppy", "condom", "halberd", "Leadbelly", "vocation's",
117
+ "tanners", "chanticleer", "secretariats", "Ecuador's", "suppurated",
118
+ "users", "slag's", "atrocity's", "pillar", "sleeveless", "bulldozers",
119
+ "turners", "hemline", "astounded", "rosaries", "Mallarmé", "crucifies",
120
+ "Maidenform", "contribution", "evolve", "chemicals", "uteri",
121
+ "expostulation", "roamers", "daiquiris", "arraignment", "ribs", "King's",
122
+ "Persepolis", "arsenic's", "blindfolds", "bloodsucker's", "restocks",
123
+ "falconry", "Olympia's", "Colosseum's", "vigils", "Louie's",
124
+ "unwillingly", "sealed", "potatoes", "Argentine", "audit's", "outworn",
125
+ "boggles", "likely", "alleging", "Tinkerbell", "redistribution's",
126
+ "Normandy", "Cortes", "porter's", "buntings", "cornucopias", "rosewoods",
127
+ "shelf's", "airdrops", "summits", "Rosalyn", "redecorating", "twirlers",
128
+ "monsters", "directed", "semiautomatics", "Foch", "Hobart", "mutilates",
129
+ "Wilma's", "ornamenting", "Clifford's", "pyromania", "Strasbourg",
130
+ "bleeders", "additions", "super", "effortlessly", "piecing", "vacations",
131
+ "gybes", "warranted", "Ting", "her", "histrionic", "marshaled", "spore's",
132
+ "villainy's", "brat", "confusion", "amphitheatre's", "adjourns",
133
+ "guzzled", "Visayans", "rogue's", "morsels", "candlestick", "flaks",
134
+ "Waterbury", "pulp's", "endorser's", "postdoc", "coffining", "swallowing",
135
+ "Wrangell", "Marcie's", "Marley", "untapped", "fear's", "Kant",
136
+ "pursuit's", "normally", "jackals", "orals", "Paramaribo's", "Marilyn's",
137
+ "Diem's", "narrower", "medicinally", "chickweed's", "pretentiousness",
138
+ "Lardner", "baritone's", "purrs", "Pam's", "pestles", "Philip's",
139
+ "Titania", "eccentrics", "Albion's", "greed's", "raggediest",
140
+ "importations", "Truman", "incentives", "typified", "incurred",
141
+ "bandstands", "Minnie's", "pleasant", "Sandy's", "perplexities",
142
+ "crease's", "obliques", "backstop", "Nair's", "perusing", "Quixote's",
143
+ "sicknesses", "vapour's", "butte", "lariats", "disfavours", "McGuffey",
144
+ "paediatric", "filtered", "whiff's", "gunboats", "devolved",
145
+ "extravaganza's", "organism", "giggling", "citadel's", "counterbalances",
146
+ "executrixes", "Cathay", "marshmallow's", "iniquitous", "Katmai", "Siva",
147
+ "welled", "impertinence's", "plunger", "rice", "forgers", "Larousse",
148
+ "pollution's", "medium", "residue's", "rumbas", "Odis", "arrogant",
149
+ "Jasper's", "panged", "doubted", "vistaing", "decibel's", "modulus's",
150
+ "chickpea's", "mugger's", "potentates", "sequesters", "academy's",
151
+ "Turk's", "pharmacology's", "defogger", "clomp", "soulless", "elastic",
152
+ "la's", "shards", "unfortunate", "counterclaim's", "objections", "towel",
153
+ "converged", "z", "ionisation", "stirrups", "antiquarians", "constructor",
154
+ "virtuosity's", "Göteborg", "centigramme's", "translators", "dalliance's",
155
+ "us", "bullfight", "drawer's", "nonconformist", "handcrafts", "Magritte",
156
+ "tulle", "plant's", "routine", "colour's", "latency's", "repertoire's",
157
+ "photocopies", "catalyse", "ashrams", "lagging", "flapjack's",
158
+ "ayatollahs", "decentest", "pitted", "conformity", "jack", "batsman",
159
+ "electrifies", "Unitarians", "obtain", "medicates", "tumour's",
160
+ "nutritionally", "haystack", "bustles", "slut", "satirising", "birettas",
161
+ "starring", "Kubrick's", "flogs", "chequering", "Menkalinan's",
162
+ "Barbados's", "Bioko", "swinish", "hades", "perjured", "timing's",
163
+ "cocaine", "ejecting", "rationalises", "dilettante's", "umping",
164
+ "capsized", "frogmen", "matt", "prostituting", "bola's", "devolution's",
165
+ "poxing", "Maritza's", "snob's", "scoped", "Costco", "feral", "sirocco",
166
+ "rebating", "truculence", "junkier", "nabs", "elicit", "allegiance",
167
+ "care", "arteriosclerosis's", "nonproliferation's", "doxologies",
168
+ "disconsolate", "bodega", "designers", "Rembrandt", "apostasies",
169
+ "garrulousness", "Hertzsprung's", "hayseeds", "noncooperation's",
170
+ "resentment", "cuticles", "sandboxes", "gimmicks", "magnolia",
171
+ "invalidity's", "pulverised", "Tinkerbell's", "hypoglycemics",
172
+ "gunboat's", "workbench's", "fleetingly's", "sportsman's", "trots",
173
+ "decomposes", "discrepancies", "owls", "obscener", "organic", "stoutness",
174
+ "councillor's", "Philippine's", "Aline", "coarsening", "suffocated",
175
+ "infighting's", "peculiarity", "roof's", "premier", "sucked", "churl",
176
+ "remounts", "intends", "wiles", "unfold", "unperturbed", "wainscotings",
177
+ "restfuller", "ashtray's", "wader's", "decanters", "gild", "tandems",
178
+ "spooked", "galling", "annuity's", "opacity", "clamour's", "flaccid",
179
+ "caroming", "savvying", "mammalian's", "toadstool's", "doohickey", "jibs",
180
+ "conquests", "dishes", "effusively", "distinctions", "curly", "Peckinpah",
181
+ "whining", "quasar", "sponge", "infrequent", "Novembers", "cowling",
182
+ "poem's", "muzzles", "Sufi", "authoritarians", "prompts", "Gavin's",
183
+ "morphology's", "shenanigan", "narrated", "rapprochement", "Heine",
184
+ "propane's", "addition", "prefect's", "pining", "dwindles",
185
+ "compulsiveness's", "objectors", "trudging", "segregates", "language",
186
+ "enthralled", "explosiveness", "toeing", "drainers", "Merrimack's",
187
+ "smarten", "bigwig's", "embroiders", "Medicaids", "grammar's", "behest's",
188
+ "chiseled", "equalled", "factual", "Casablanca's", "dams",
189
+ "disillusioned", "turtleneck", "Baden", "provinces", "bushwhacked", "fey",
190
+ "Yangtze", "loan's", "decent", "strobe", "challenger's", "hometown",
191
+ "Neal", "Ernestine's", "magnetises", "minute", "patrol", "Starbucks",
192
+ "Bernstein", "signal", "interplanetary", "tweak", "archdeacon",
193
+ "untoward", "transducer", "azaleas", "levied", "worlds", "talks",
194
+ "Tancred", "hairsplitting's", "edibility's", "confab", "rosetted",
195
+ "Spanish", "Americanisation", "Charley", "realm's", "incongruities",
196
+ "chinstraps", "dollhouses", "binocular", "popgun", "physiotherapy's",
197
+ "knave's", "angelically", "heartbreaking", "clarions", "bespeaks",
198
+ "pivotal", "Zosma", "ungrammatical", "dilution", "tidily", "Dejesus's",
199
+ "taller", "pennyweight's", "freshman", "Jamestown", "chiefer", "amen",
200
+ "attiring", "appurtenance's", "opiates", "mottoes", "towellings", "ashen",
201
+ "font's", "spoors", "pupil", "groom's", "skimpy", "achieves",
202
+ "intolerance's", "ardour's", "exorcist", "bottoming", "snag's",
203
+ "Frenches", "hysteric's", "ladyfinger's", "differences", "seed",
204
+ "clubfoot's", "glades", "Elton's", "jargon", "Waldo", "grinning",
205
+ "coherence's", "winos", "turnround", "appended", "Ethelred's", "delete",
206
+ "steadfastness's", "miss", "thermoplastic", "depraves", "unctuous",
207
+ "reanimates", "transfusing", "protects", "Babbage's", "foists", "inn",
208
+ "etched", "sanctimoniously", "idling", "timepiece", "holistic",
209
+ "waterside", "ulna's", "swindled", "employables", "zebra", "nieces",
210
+ "pertained", "usages", "vamp's", "Larry's", "cooler's", "holographs",
211
+ "clewing", "stubborning", "peaked", "underfeeds", "marshmallows",
212
+ "agreeable", "beards", "Slovenia's", "nitroglycerin", "palls", "impurer",
213
+ "armours", "stomachaches", "notification's", "Dixieland's", "crozier's",
214
+ "neurotic", "kudos", "Tania's", "M", "soundtrack's", "territory's",
215
+ "sped", "house's", "divisibility", "ingress's", "pummelled", "Isabel",
216
+ "Dewitt", "seemly", "hutched", "calliope", "lengthwise", "flubs",
217
+ "Moldavia's", "Mercia", "McBride's", "Lenten", "pulverise", "football",
218
+ "oligarchy", "Max", "scribbler", "acclimatize", "brainwashes",
219
+ "apprenticed", "benevolences", "two", "Wodehouse", "crew's", "massacre",
220
+ "proportionals", "Jewishness's", "instep's", "emissary", "folder",
221
+ "nonentity's", "convinced", "caption", "kangarooed", "dogie",
222
+ "vagabonding", "auction's", "appraising", "antimony", "part's",
223
+ "longitude's", "inconsiderateness's", "pawning", "serer", "solos",
224
+ "histories", "mushy", "parturition", "munched", "oregano", "inanest",
225
+ "dryness", "kitchenware", "unexpected", "covens", "cheesecakes",
226
+ "stakeout's", "Pulaski's", "Yoknapatawpha's", "pinhead", "drifted",
227
+ "guzzler's", "funking", "sou'wester", "oesophagus's", "highbrow",
228
+ "contralto", "meningitis", "Mazzini", "raggedest", "vaginas", "misfiring",
229
+ "margaritas", "wedder", "pointed", "slicked", "garlanded", "comeuppances",
230
+ "vassals", "Sui", "Concord", "bozos", "Garry's", "Maribel's", "epileptic",
231
+ "Jehoshaphat's", "revolutionary's", "kneecaps", "songbird", "actively",
232
+ "Meredith", "toddler", "distrusting", "fuchsias", "perusal", "instills",
233
+ "deathbed", "sunspot's", "spatula's", "Muscovy", "humaniser", "Keats",
234
+ "regrets", "deflect", "theories", "nonpluses", "populating", "leniency's",
235
+ "penicillin's", "gaol's", "borough", "moose's", "dogmata",
236
+ "transcendentally", "supposition's", "nursed", "Gagarin's", "honest",
237
+ "Chandrasekhar's", "mudslinger's", "parable", "bonged", "Wyeth's",
238
+ "Ochoa's", "Grenoble", "steamy", "halter's", "rotisserie's", "pagoda's",
239
+ "wallaby's", "Yank", "pretzel", "rapist's", "estrange", "hectored",
240
+ "Puebla's", "conniver", "creditor's", "dole's", "Fotomat", "patents",
241
+ "heckling", "thickener", "etches", "yogi", "hemstitched", "obverses",
242
+ "Lipizzaner", "divert", "Strong's", "sagest", "Alabama", "He", "Carrie's",
243
+ "obligation's", "verity's", "outed", "Rhee", "bluffed", "codas",
244
+ "crèche's", "unpalatable", "dilettanti", "vestment", "purse's",
245
+ "inflammation's", "bookmarked", "doing's", "whinnying", "impersonators",
246
+ "Theiler", "scurried", "resistor", "southerners", "Anacreon",
247
+ "reconstruction's", "footage", "trespassing", "Kafka", "bottling",
248
+ "stays", "Gretzky", "overburdening", "princesses", "weathercock's",
249
+ "atolls", "cheerier", "packet", "surrenders", "teacup", "Sabik's",
250
+ "undecidable", "lollygagged", "pawl's", "anaesthesiology", "sublimely",
251
+ "contortionists", "motorcades", "Maureen", "lamasery", "yourselves",
252
+ "Creighton", "poliomyelitis's", "civil", "outmanoeuvre", "lauded",
253
+ "closeness", "Humboldt's", "pretzels", "ungrudging", "blackguard's",
254
+ "sickles", "typo", "narcotics", "linesman", "psychotics", "pictured",
255
+ "deviltry", "Yahtzee", "Lovelace's", "cerebra", "airiness's", "bewitch",
256
+ "how", "motherland's", "crate's", "Keenan's", "turnstile's",
257
+ "pedometer's", "carted", "slipping", "fallow", "Canadian", "ladybird's",
258
+ "thump", "shopper's", "enters", "scowls", "nematode", "focused",
259
+ "Riley's", "grainiest", "novas", "snuffled", "leftovers", "deify",
260
+ "Samoan", "pruning", "contenting", "Khachaturian's", "triads",
261
+ "genealogies", "psalmist", "shaming", "appropriated", "ignominies",
262
+ "Beadle's", "MHz", "peerages", "facile", "Seoul", "Janna's", "jig's",
263
+ "mousiness's", "funnier", "delimiter", "watermark", "sheik's", "Reasoner",
264
+ "ipecac's", "curdles", "wronged", "Segovia's", "solders", "Dunne's",
265
+ "contractor", "awards", "hostels", "pinkie's", "Herzl", "misplace",
266
+ "shuttle", "innovative", "vestries", "cosmoses", "trikes", "Casandra's",
267
+ "hokier", "carouser's", "summerhouses", "renascence", "decomposed",
268
+ "Balzac's", "outlast", "shod", "squalling", "smugging", "weighing",
269
+ "omega's", "selects", "fleetingly", "Finland", "petted", "disrespects",
270
+ "fetter", "confound", "brads", "Bosnia's", "preposition's", "guy's",
271
+ "different", "tracts", "paediatrics's", "polygon", "eyetooth's", "Aesop",
272
+ "pentagons", "professions", "homeowner", "looter's", "intimidated",
273
+ "lustre's", "loneliness", "catnapped", "counties", "pailful",
274
+ "Christendom's", "Barents", "penis", "Mumford's", "Nigel", "éclairs",
275
+ "splats", "diabolical", "popularly", "quart", "abjected", "Rasalgethi",
276
+ "camel's", "inimical", "overweening", "distention's", "Advil", "casement",
277
+ "seamier", "avaricious", "sierra's", "caparison's", "moldered", "Cortez",
278
+ "handmaid's", "disappointment", "billowed", "overpopulated", "outsets",
279
+ "ray", "smoother", "overkill", "somber", "tiller's", "zigzag", "adviser",
280
+ "absorption's", "sturdily", "hairy", "bloodmobile", "investiture's",
281
+ "creature", "ripeness's", "Jonathon", "arborvitae's", "skulduggery",
282
+ "bog", "skeleton's", "Kit's", "Panamas", "Ashlee's", "jazzy", "snit",
283
+ "divisive", "caribous", "permuting", "frankest", "annotated", "oak's",
284
+ "meg's", "Gill", "burrito", "dormancy's", "offings", "Nike",
285
+ "outnumbered", "skater's", "Portugal", "deficit", "Cannon's", "pockmark",
286
+ "sediment's", "mailbox", "innuendoed", "retire", "wolfhound's",
287
+ "nicotine's", "brigade's", "mettle's", "softhearted", "hooey's",
288
+ "abdication", "Orval", "Jaime", "ship", "hyphenations", "sectarians",
289
+ "Alabaman", "tagging", "ultras", "schizoids", "medicines", "undersized",
290
+ "Gray", "maternity's", "bandaging", "scooping", "coercion's", "serapes",
291
+ "celebrate", "Listerine's", "throve", "crypt's", "nearsighted",
292
+ "metallurgists", "Delicious", "cotton's", "yoked", "cogitates",
293
+ "underage", "cigarette's", "hallways", "Cointreau", "ma'am", "spacing's",
294
+ "foresight", "parkway's", "Edwardian", "mediator", "Turner", "Derrida's",
295
+ "motorist's", "hobo", "equivalences", "sophism", "peeping", "telescoped",
296
+ "overproduce", "ductility", "Leblanc", "refractory", "passé", "decodes",
297
+ "womanising", "flax's", "pond's", "infrequency", "talkativeness's",
298
+ "settlement's", "Prince", "bating", "multimillionaire", "Schultz",
299
+ "premiss", "quackery", "bathhouse", "Leno's", "Monday's", "Hung's",
300
+ "undaunted", "bewaring", "tension's", "Chile's", "Rostand's", "platoons",
301
+ "rodeo's", "Dionne", "Dyson's", "gingivitis's", "fewer",
302
+ "electromagnetism's", "scrubbier", "ensconced", "wretcheder", "mica's",
303
+ "expectorant", "snapper's", "chastised", "habitation", "spry", "bathing",
304
+ "stealth's", "champagnes", "baleful", "fencing's", "threaded", "codicils",
305
+ "disgraced", "redcaps", "addends", "Olivier", "clasped", "Gwendolyn",
306
+ "foment", "angularity's", "strenuously", "gorilla", "misbehaved",
307
+ "surplus's", "newsier", "positioned", "bloodmobiles", "circumstantials",
308
+ "person's", "varicose", "Calliope", "plethora", "Olmsted",
309
+ "reconciliation", "Brendan's", "beset", "totters", "sailors",
310
+ "parliamentarians", "Whitaker", "hilts", "pummelling", "academician's",
311
+ "ruse", "discreeter", "appetisingly", "perfections", "anus", "overrode",
312
+ "pedantry's", "possessed", "germs", "unscrews", "expired",
313
+ "semitrailer's", "Cupid's", "nonsmoker", "Marathon", "secs", "Hopkins",
314
+ "freeing", "libelled", "furious", "staccatos", "electroencephalogram's",
315
+ "malingerer's", "impulses", "briars", "Tran", "hilltops", "sulks",
316
+ "quailed", "fads", "retrenches", "spouted", "outtake", "puncture's",
317
+ "rats", "kibitzed", "berets", "omnivorous", "flange", "Mons", "glints",
318
+ "mansards", "thou", "cuing", "suspected", "Kaiser's", "savvier", "skits",
319
+ "interdict's", "Booker", "Rubinstein", "Tm's", "crossing's", "dewlap",
320
+ "guarantor's", "edification's", "joyfullest", "crossed", "chowdering",
321
+ "sillier", "reloading", "commodity's", "bodkins", "conduced", "coughs",
322
+ "nucleus's", "sixtieth", "proverbially", "comprehensive", "ineluctably",
323
+ "patrolmen", "resuscitating", "carpetbag's", "Darrin's", "Yeager",
324
+ "Bataan's", "spoonsful", "proceeds", "wrongdoer", "Karroo", "heart",
325
+ "poison", "typifying", "endowment's", "aquanauts", "deaconesses",
326
+ "homosexuality", "Maxine", "haunching", "centred", "Peking's",
327
+ "toothiest", "growers", "firebombs", "throbs", "Downy", "contribution's",
328
+ "sago's", "Cole", "Knoxville", "leftmost", "Nell's", "Baffin", "barrings",
329
+ "contagions", "disencumbers", "countdown", "quintuple", "perihelion",
330
+ "creationism's", "actioning", "admiralty", "Mt's", "durability's",
331
+ "sewer's", "replicas", "oxide", "ripened", "Pisces's", "Cinerama's",
332
+ "catheters", "oppressive", "roosting", "foggiest", "properly", "Kareem",
333
+ "Ollie", "minuted", "vehicles", "eel", "remunerates", "swashbuckler's",
334
+ "remunerative", "sanguining", "Belem's", "forlornly", "rudders",
335
+ "officialdom", "countertenors", "Upton", "whoop", "animations", "arouses",
336
+ "millionths", "videocassette", "fledgling", "shake", "exterminated",
337
+ "Cain's", "trendiest", "wariest", "torpedoes", "airmails", "Cameron's",
338
+ "discord's", "spitefulness's", "thudded", "menaced", "takeovers",
339
+ "solicited", "wallpapers", "economic", "cache", "rechargeable", "gongs",
340
+ "droning", "exemption", "Alaskans", "toothed", "snifter", "Stephens",
341
+ "prejudge", "doctor's", "bobolinks", "rotates", "valuation's", "narrator",
342
+ "weaning", "uncle", "shelter", "destitution's", "Edgardo's", "gauge",
343
+ "Nice", "Adolf's", "rheumatics", "inheritances", "undesirables",
344
+ "Eileen's", "flyweight's", "scope", "possessiveness", "tipsily",
345
+ "effulgence", "rematch", "Baltic", "unsteadiest", "rodeos", "gloaming's",
346
+ "ringers", "randomised", "commissars", "destroyer's", "router",
347
+ "disengaging", "it's", "Albert", "rampantly", "varmint", "Adkins",
348
+ "chevron", "insomniac", "bobsledded", "masochist's", "chronometers",
349
+ "compaction", "Mauro", "sidled", "Highlander's", "snail's", "syllabifies",
350
+ "application's", "symmetrical", "blacking", "accent's", "sentimentalists",
351
+ "sonatas", "profanities", "sloping", "Araby", "percolate", "repeated",
352
+ "youthfulness's", "Loyola", "deliriously", "matriarch's", "tailors",
353
+ "rerouting", "hairpin", "dispersal", "endowment", "disquieting", "swat",
354
+ "neckerchieves", "wrinkles", "amoebas", "Darcy", "orthodontics's",
355
+ "milder", "sneezing", "prescience's", "pads", "wrought", "perspicuity's",
356
+ "materialist", "pull", "laundryman's", "lazily", "protractor's", "Vic",
357
+ "photocopier", "guardrooms", "cablecasting", "confirms", "excretions",
358
+ "combatant", "counterfeiters", "periwig", "genteelest", "router's",
359
+ "springy", "procreated", "syphon", "parent's", "bigwigs", "rebelled",
360
+ "milkmaids", "McGee's", "seaworthier", "Bellatrix's", "tenement",
361
+ "embryologists", "Vaselining", "burrow's", "tonnage's", "Petty's",
362
+ "chancels", "scouring", "mouser", "recompensed", "guarding", "editor",
363
+ "raster", "bourgeoisie's", "interpolating", "skinflint's", "transport",
364
+ "bullfinch", "needlessly", "withholds", "counterclockwise", "panicking",
365
+ "Ahriman", "flambeing", "contrary", "heartstrings", "whittled", "crib's",
366
+ "highlighter", "extroverted", "Martinique's", "racquets", "Maldivian",
367
+ "physiognomy", "Hammarskjold", "massage", "shingling", "neighbourhood",
368
+ "boobed", "vulture", "intercontinental", "cobblers", "peddlers",
369
+ "forthrightly", "germicide", "raindrop's", "fir's", "decaffeinates",
370
+ "wobblier", "abnegated", "cruiser's", "satiety", "trilled", "impending",
371
+ "gulf", "mountebank", "beltway", "reappointment", "cinematographer",
372
+ "pylon", "penthouses", "morally", "installs", "Walsh's", "drawstring",
373
+ "circus's", "Khayyam's", "Myrtle's", "ventrals", "category's",
374
+ "opportunistic", "grovelling", "warier", "upchuck", "hairdresser's",
375
+ "Montanans", "jobber", "dazzle", "encirclement's", "muffin's", "coronets",
376
+ "focus's", "footfall's", "subjunctives", "late", "pedagogued",
377
+ "dignitaries", "content", "blockbusters", "reminiscent", "mayor",
378
+ "specifier", "extinction", "nutshell's", "catbird's", "bundle",
379
+ "gracefulness", "exceed", "estranges", "chancy", "bankrupted", "Avery",
380
+ "Barnett", "succulence", "stacking", "ensnare", "truck", "embargo",
381
+ "persecutes", "translation's", "muskrat's", "illumines", "undercoat's",
382
+ "fleecier", "brick", "qualities", "imprecision", "reprisals", "discounts",
383
+ "harmonics", "Mann's", "terrorism", "interminable", "Santiago's",
384
+ "deepness", "tramples", "golder", "voyeurism's", "tent", "particle's",
385
+ "minuend", "waxwings", "knobby", "trustee", "funnily", "hotheadedness's",
386
+ "Kristin", "what", "bite", "murmur's", "pustule's", "weeknights",
387
+ "rocked", "athlete", "ventilates", "impresses", "daguerreotyping",
388
+ "Gross", "gambols", "villa", "maraud", "disapproval", "apostrophe's",
389
+ "sheaf", "noisemaker's", "autonomy's", "massing", "daemon's", "Thackeray",
390
+ "fermenting", "whammy", "philosophise", "empathy", "calamities",
391
+ "sunbathe", "Qom", "yahoo's", "coxcomb's", "move", "school's",
392
+ "rainmakers", "shipwreck", "potbelly's", "courageously", "current",
393
+ "Aleut", "treaties", "U", "always", "Bosch", "impregnating", "bud's",
394
+ "carat", "centrists", "acquaintance's", "convoy's", "chichis",
395
+ "restraint's", "Cosby", "factotums", "handshaking", "paragon's",
396
+ "mileages", "Tammie", "cartoonists", "lemmas", "lowliness's", "onion's",
397
+ "E's", "Bible", "Cranmer", "fob's", "minks", "overstocking", "Willamette",
398
+ "needle's", "scuppers", "Carborundum", "upwardly", "tallies", "aptitude",
399
+ "synod", "nasturtium's", "Pensacola", "snappish", "merino", "sups",
400
+ "fingerboard's", "prodigy's", "narcissism's", "substantial", "lug",
401
+ "establishing", "Vergil's", "patrimonies", "shorted", "forestation",
402
+ "undeniable", "Katmandu", "lamination", "trollop's", "odd", "stanza",
403
+ "paraplegic", "melanin", "Rico", "foreman", "stereotypes", "affinity's",
404
+ "cleansing", "sautéing", "epochs", "crooners", "manicured", "undisclosed",
405
+ "propel", "usage", "Alioth's", "Aurelia's", "peruse", "Vassar's",
406
+ "Demosthenes's", "Brazos", "supermarket", "scribbles", "Jekyll's",
407
+ "discomfort's", "mastiffs", "ballasting", "Figueroa", "turnstiles",
408
+ "convince", "Shelton's", "Gustavo", "shunting", "Fujitsu's", "fining's",
409
+ "hippos", "dam's", "expressionists", "peewee", "troop's"
410
+ ]
411
+ WORDS_SIZE = WORDS.size
412
+
413
+ def random_word
414
+ return WORDS[rand(WORDS_SIZE)]
415
+ end
416
+
417
+ def random_sentence(max_len)
418
+ sentence = ""
419
+ (1 + rand(max_len)).times { sentence << " " << random_word }
420
+ return sentence
421
+ end
422
+
423
+ def random_doc(max_fields = 10, max_elements = 10, max_len = 100)
424
+ doc = {}
425
+ (1 + rand(max_fields)).times do
426
+ field = random_word.intern
427
+ elem_count = rand(max_elements) + 1
428
+ if (elem_count == 1)
429
+ doc[field] = random_sentence(max_len)
430
+ else
431
+ doc[field] = []
432
+ elem_count.times { doc[field] << random_sentence(max_len)}
433
+ end
434
+ end
435
+ return doc
436
+ end
437
+ end
@@ -0,0 +1,315 @@
1
+ module IndexTestHelper
2
+ include Ferret::Index
3
+ include Ferret::Analysis
4
+ include Ferret::Search
5
+
6
+ def IndexTestHelper.make_binary(size)
7
+ tmp = Array.new(size)
8
+ size.times {|i| tmp[i] = i%256 }
9
+ return tmp.pack("c*")
10
+ end
11
+
12
+ BINARY_DATA = IndexTestHelper.make_binary(256)
13
+ COMPRESSED_BINARY_DATA = IndexTestHelper.make_binary(56)
14
+
15
+ def IndexTestHelper.prepare_document(dir)
16
+ fis = FieldInfos.new
17
+ fis.add_field(:text_field1, :term_vector => :no)
18
+ fis.add_field(:text_field2)
19
+ fis.add_field(:key_field, :index => :untokenized)
20
+ fis.add_field(:unindexed_field, :index => :no)
21
+ fis.add_field(:unstored_field1, :store => :no, :term_vector => :no)
22
+ fis.add_field(:unstored_field2, :store => :no, :term_vector => :yes)
23
+ fis.add_field(:compressed_field, :store => :compressed, :term_vector => :yes)
24
+ fis.add_field(:binary_field, :index => :no, :term_vector => :no)
25
+ fis.add_field(:compressed_binary_field, :store => :compressed,
26
+ :index => :no, :term_vector => :no)
27
+ doc = {
28
+ :text_field1 => "field one text",
29
+ :text_field2 => "field field field two text",
30
+ :key_field => "keyword",
31
+ :unindexed_field => "unindexed field text",
32
+ :unstored_field1 => "unstored field text one",
33
+ :unstored_field2 => "unstored field text two",
34
+ :compressed_field => "compressed text",
35
+ :binary_field => BINARY_DATA,
36
+ :compressed_binary_field => COMPRESSED_BINARY_DATA
37
+ }
38
+ return doc, fis
39
+ end
40
+
41
+ def IndexTestHelper.prepare_documents
42
+ [
43
+ ["apple", "green"],
44
+ ["apple", "red"],
45
+ ["orange", "orange"],
46
+ ["grape", "green"],
47
+ ["grape", "purple"],
48
+ ["mandarin", "orange"],
49
+ ["peach", "orange"],
50
+ ["apricot", "orange"]
51
+ ].map { |food| {"name" => food[0], "colour" => food[1]} }
52
+ end
53
+
54
+ def IndexTestHelper.prepare_book_list
55
+ books = [
56
+ {"author" => "P.H. Newby",
57
+ "title" => "Something To Answer For",
58
+ "year" => "1969"},
59
+ {"author" => "Bernice Rubens",
60
+ "title" => "The Elected Member",
61
+ "year" => "1970"},
62
+ {"author" => "V. S. Naipaul",
63
+ "title" => "In a Free State",
64
+ "year" => "1971"},
65
+ {"author" => "John Berger",
66
+ "title" => "G",
67
+ "year" => "1972"},
68
+ {"author" => "J. G. Farrell",
69
+ "title" => "The Siege of Krishnapur",
70
+ "year" => "1973"},
71
+ {"author" => "Stanley Middleton",
72
+ "title" => "Holiday",
73
+ "year" => "1974"},
74
+ {"author" => "Nadine Gordimer",
75
+ "title" => "The Conservationist",
76
+ "year" => "1974"},
77
+ {"author" => "Ruth Prawer Jhabvala",
78
+ "title" => "Heat and Dust",
79
+ "year" => "1975"},
80
+ {"author" => "David Storey",
81
+ "title" => "Saville",
82
+ "year" => "1976"},
83
+ {"author" => "Paul Scott",
84
+ "title" => "Staying On",
85
+ "year" => "1977"},
86
+ {"author" => "Iris Murdoch",
87
+ "title" => "The Sea",
88
+ "year" => "1978"},
89
+ {"author" => "Penelope Fitzgerald",
90
+ "title" => "Offshore",
91
+ "year" => "1979"},
92
+ {"author" => "William Golding",
93
+ "title" => "Rites of Passage",
94
+ "year" => "1980"},
95
+ {"author" => "Salman Rushdie",
96
+ "title" => "Midnight's Children",
97
+ "year" => "1981"},
98
+ {"author" => "Thomas Keneally",
99
+ "title" => "Schindler's Ark",
100
+ "year" => "1982"},
101
+ {"author" => "J. M. Coetzee",
102
+ "title" => "Life and Times of Michael K",
103
+ "year" => "1983"},
104
+ {"author" => "Anita Brookner",
105
+ "title" => "Hotel du Lac",
106
+ "year" => "1984"},
107
+ {"author" => "Keri Hulme",
108
+ "title" => "The Bone People",
109
+ "year" => "1985"},
110
+ {"author" => "Kingsley Amis",
111
+ "title" => "The Old Devils",
112
+ "year" => "1986"},
113
+ {"author" => "Penelope Lively",
114
+ "title" => "Moon Tiger",
115
+ "year" => "1987"},
116
+ {"author" => "Peter Carey",
117
+ "title" => "Oscar and Lucinda",
118
+ "year" => "1988"},
119
+ {"author" => "Kazuo Ishiguro",
120
+ "title" => "The Remains of the Day",
121
+ "year" => "1989"},
122
+ {"author" => "A. S. Byatt",
123
+ "title" => "Possession",
124
+ "year" => "1990"},
125
+ {"author" => "Ben Okri",
126
+ "title" => "The Famished Road",
127
+ "year" => "1991"},
128
+ {"author" => "Michael Ondaatje",
129
+ "title" => "The English Patient",
130
+ "year" => "1992"},
131
+ {"author" => "Barry Unsworth",
132
+ "title" => "Sacred Hunger",
133
+ "year" => "1992"},
134
+ {"author" => "Roddy Doyle",
135
+ "title" => "Paddy Clarke Ha Ha Ha",
136
+ "year" => "1993"},
137
+ {"author" => "James Kelman",
138
+ "title" => "How Late It Was, How Late",
139
+ "year" => "1994"},
140
+ {"author" => "Pat Barker",
141
+ "title" => "The Ghost Road",
142
+ "year" => "1995"},
143
+ {"author" => "Graham Swift",
144
+ "title" => "Last Orders",
145
+ "year" => "1996"},
146
+ {"author" => "Arundati Roy",
147
+ "title" => "The God of Small Things",
148
+ "year" => "1997"},
149
+ {"author" => "Ian McEwan",
150
+ "title" => "Amsterdam",
151
+ "year" => "1998"},
152
+ {"author" => "J. M. Coetzee",
153
+ "title" => "Disgrace",
154
+ "year" => "1999"},
155
+ {"author" => "Margaret Atwood",
156
+ "title" => "The Blind Assassin",
157
+ "year" => "2000"},
158
+ {"author" => "Peter Carey",
159
+ "title" => "True History of the Kelly Gang",
160
+ "year" => "2001"},
161
+ {"author" => "Yann Martel",
162
+ "title" => "The Life of Pi",
163
+ "year" => "2002"},
164
+ {"author" => "DBC Pierre",
165
+ "title" => "Vernon God Little",
166
+ "year" => "2003"}
167
+ ]
168
+ end
169
+
170
+ def self.prepare_ir_test_fis
171
+ fis = FieldInfos.new
172
+ fis.add_field(:body)
173
+ fis.add_field(:changing_field, :term_vector => :no)
174
+ fis.add_field(:title, :index => :untokenized, :term_vector => :with_offsets)
175
+ fis.add_field(:author, :term_vector => :with_positions)
176
+ fis.add_field(:year, :index => :no, :term_vector => :no)
177
+ fis.add_field(:text, :store => :no, :term_vector => :no)
178
+ end
179
+
180
+ INDEX_TEST_DOC_COUNT = 64
181
+ def self.prepare_ir_test_docs
182
+ docs = []
183
+ docs[0] = {
184
+ :body => "Where is Wally",
185
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
186
+ "word3 word3",
187
+ }
188
+ docs[1] = {
189
+ :body => "Some Random Sentence read"
190
+ }
191
+ docs[2] = {
192
+ :body => "Some read Random Sentence read"
193
+ }
194
+ docs[3] = {
195
+ :title => "War And Peace",
196
+ :body => "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3",
197
+ :author => "Leo Tolstoy",
198
+ :year => "1865",
199
+ :text => "more text which is not stored"
200
+ }
201
+ docs[4] = {
202
+ :body => "Some Random Sentence"
203
+ }
204
+ docs[5] = {
205
+ :body => "Here's Wally"
206
+ }
207
+ docs[6] = {
208
+ :body => "Some Random Sentence read read read read"
209
+ }
210
+ docs[7] = {
211
+ :body => "Some Random Sentence"
212
+ }
213
+ docs[8] = {
214
+ :body => "Some Random Sentence"
215
+ }
216
+ docs[9] = {
217
+ :body => "read Some Random Sentence read this will be used after " +
218
+ "unfinished next position read"
219
+ }
220
+ docs[10] = {
221
+ :body => "Some read Random Sentence",
222
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
223
+ "word3 word3"
224
+ }
225
+ docs[11] = {
226
+ :body => "And here too. Well, maybe Not"
227
+ }
228
+ docs[12] = {
229
+ :body => "Some Random Sentence"
230
+ }
231
+ docs[13] = {
232
+ :body => "Some Random Sentence"
233
+ }
234
+ docs[14] = {
235
+ :body => "Some Random Sentence"
236
+ }
237
+ docs[15] = {
238
+ :body => "Some Random Sentence"
239
+ }
240
+ docs[16] = {
241
+ :body => "Some Random read read Sentence"
242
+ }
243
+ docs[17] = {
244
+ :body => "Some Random read Sentence",
245
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
246
+ "word3 word3"
247
+ }
248
+ docs[18] = {
249
+ :body => "Wally Wally Wally"
250
+ }
251
+ docs[19] = {
252
+ :body => "Some Random Sentence",
253
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
254
+ "word3 word3"
255
+ }
256
+ docs[20] = {
257
+ :body => "Wally is where Wally usually likes to go. Wally Mart! Wally " +
258
+ "likes shopping there for Where's Wally books. Wally likes " +
259
+ "to read",
260
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
261
+ "word3 word3"
262
+ }
263
+ docs[21] = {
264
+ :body => "Some Random Sentence read read read and more read read read",
265
+ :changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
266
+ "word3 word3"
267
+ }
268
+
269
+ buf = ""
270
+ 21.times { buf << "skip " }
271
+ 22.upto(INDEX_TEST_DOC_COUNT-1) do |i|
272
+ buf << "skip "
273
+ docs[i] = {:text => buf.clone}
274
+ end
275
+ return docs
276
+ end
277
+
278
+ INDEX_TEST_DOCS = self.prepare_ir_test_docs()
279
+ INDEX_TEST_FIS = self.prepare_ir_test_fis()
280
+
281
+ def self.prepare_search_docs
282
+ i = 1
283
+ [
284
+ ["20050930", "cat1/", "word1" ],
285
+ ["20051001", "cat1/sub1", "word1 word2 the quick brown fox" ],
286
+ ["20051002", "cat1/sub1/subsub1", "word1 word3" ],
287
+ ["20051003", "cat1/sub2", "word1 word3" ],
288
+ ["20051004", "cat1/sub2/subsub2", "word1 word2" ],
289
+ ["20051005", "cat2/sub1", "word1" ],
290
+ ["20051006", "cat2/sub1", "word1 word3" ],
291
+ ["20051007", "cat2/sub1", "word1" ],
292
+ ["20051008", "cat2/sub1", "word1 word2 word3 the fast brown fox"],
293
+ ["20051009", "cat3/sub1", "word1" ],
294
+ ["20051010", "cat3/sub1", "word1" ],
295
+ ["20051011", "cat3/sub1", "word1 word3 the quick red fox" ],
296
+ ["20051012", "cat3/sub1", "word1" ],
297
+ ["20051013", "cat1/sub2", "word1" ],
298
+ ["20051014", "cat1/sub1", "word1 word3 the quick hairy fox" ],
299
+ ["20051015", "cat1/sub2/subsub1", "word1" ],
300
+ ["20051016", "cat1/sub1/subsub2",
301
+ "word1 the quick fox is brown and hairy and a little red" ],
302
+ ["20051017", "cat1/",
303
+ "word1 the brown fox is quick and red" ]
304
+ ].map do |date, category, field|
305
+ doc = Ferret::Document.new(i)
306
+ i += 1
307
+ doc[:date] = date
308
+ doc[:category] = category
309
+ doc[:field] = field
310
+ doc
311
+ end
312
+ end
313
+
314
+ SEARCH_TEST_DOCS = self.prepare_search_docs()
315
+ end