sdsykes-ferret 0.11.6.19
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +102 -0
- data/Rakefile +338 -0
- data/TODO +17 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/analysis.c +1555 -0
- data/ext/analysis.h +219 -0
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +123 -0
- data/ext/array.h +53 -0
- data/ext/bitvector.c +540 -0
- data/ext/bitvector.h +272 -0
- data/ext/compound_io.c +383 -0
- data/ext/config.h +42 -0
- data/ext/document.c +156 -0
- data/ext/document.h +53 -0
- data/ext/except.c +120 -0
- data/ext/except.h +168 -0
- data/ext/extconf.rb +14 -0
- data/ext/ferret.c +402 -0
- data/ext/ferret.h +91 -0
- data/ext/filter.c +156 -0
- data/ext/fs_store.c +483 -0
- data/ext/global.c +418 -0
- data/ext/global.h +117 -0
- data/ext/hash.c +567 -0
- data/ext/hash.h +473 -0
- data/ext/hashset.c +170 -0
- data/ext/hashset.h +187 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +62 -0
- data/ext/helper.h +13 -0
- data/ext/inc/lang.h +48 -0
- data/ext/inc/threading.h +31 -0
- data/ext/index.c +6425 -0
- data/ext/index.h +961 -0
- data/ext/lang.h +66 -0
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +87 -0
- data/ext/mempool.h +35 -0
- data/ext/modules.h +162 -0
- data/ext/multimapper.c +310 -0
- data/ext/multimapper.h +51 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +151 -0
- data/ext/priorityqueue.h +143 -0
- data/ext/q_boolean.c +1608 -0
- data/ext/q_const_score.c +161 -0
- data/ext/q_filtered_query.c +209 -0
- data/ext/q_fuzzy.c +268 -0
- data/ext/q_match_all.c +148 -0
- data/ext/q_multi_term.c +677 -0
- data/ext/q_parser.c +2825 -0
- data/ext/q_phrase.c +1126 -0
- data/ext/q_prefix.c +100 -0
- data/ext/q_range.c +350 -0
- data/ext/q_span.c +2402 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +171 -0
- data/ext/r_analysis.c +2575 -0
- data/ext/r_index.c +3472 -0
- data/ext/r_qparser.c +585 -0
- data/ext/r_search.c +4105 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +963 -0
- data/ext/ram_store.c +471 -0
- data/ext/search.c +1741 -0
- data/ext/search.h +885 -0
- data/ext/similarity.c +150 -0
- data/ext/similarity.h +82 -0
- data/ext/sort.c +983 -0
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +401 -0
- data/ext/store.c +692 -0
- data/ext/store.h +777 -0
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/utilities.c +446 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +29 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/index.rb +786 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +24 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +79 -0
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +548 -0
- data/test/unit/analysis/tc_token_stream.rb +646 -0
- data/test/unit/index/tc_index.rb +762 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +437 -0
- data/test/unit/index/th_doc.rb +315 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +135 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +61 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +384 -0
- data/test/unit/store/tc_fs_store.rb +77 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +285 -0
@@ -0,0 +1,437 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class IndexWriterTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Index
|
6
|
+
include Ferret::Analysis
|
7
|
+
|
8
|
+
def setup()
|
9
|
+
@dir = Ferret::Store::RAMDirectory.new
|
10
|
+
fis = FieldInfos.new()
|
11
|
+
fis.create_index(@dir)
|
12
|
+
end
|
13
|
+
|
14
|
+
def teardown()
|
15
|
+
@dir.close()
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_initialize
|
19
|
+
wlock = @dir.make_lock(IndexWriter::WRITE_LOCK_NAME)
|
20
|
+
clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
|
21
|
+
assert(! wlock.locked?)
|
22
|
+
assert(! clock.locked?)
|
23
|
+
iw = IndexWriter.new(:dir => @dir, :create => true)
|
24
|
+
assert(@dir.exists?("segments"))
|
25
|
+
assert(wlock.locked?)
|
26
|
+
iw.close()
|
27
|
+
assert(@dir.exists?("segments"))
|
28
|
+
assert(! wlock.locked?)
|
29
|
+
assert(! clock.locked?)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_add_document
|
33
|
+
iw = IndexWriter.new(:dir => @dir,
|
34
|
+
:analyzer => StandardAnalyzer.new(),
|
35
|
+
:create => true)
|
36
|
+
iw << {:title => "first doc", :content => ["contents of", "first doc"]}
|
37
|
+
assert_equal(1, iw.doc_count)
|
38
|
+
iw << ["contents of", "second doc"]
|
39
|
+
assert_equal(2, iw.doc_count)
|
40
|
+
iw << "contents of third doc"
|
41
|
+
assert_equal(3, iw.doc_count)
|
42
|
+
iw.close()
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_add_documents_fuzzy
|
46
|
+
iw = IndexWriter.new(:dir => @dir,
|
47
|
+
:analyzer => StandardAnalyzer.new())
|
48
|
+
iw.merge_factor = 3
|
49
|
+
iw.max_buffered_docs = 3
|
50
|
+
|
51
|
+
# add 100 documents
|
52
|
+
100.times do
|
53
|
+
doc = random_doc()
|
54
|
+
iw.add_document(doc)
|
55
|
+
end
|
56
|
+
assert_equal(100, iw.doc_count)
|
57
|
+
iw.close()
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
WORDS = [
|
63
|
+
"desirous", "hollowness's", "camp's", "Senegal", "broadcaster's",
|
64
|
+
"pecking", "Provence", "paternalism", "premonition", "Dumbo's",
|
65
|
+
"Darlene's", "Elbert's", "substrate", "Camille", "Menkalinan", "Cooper",
|
66
|
+
"decamps", "abatement's", "bindings", "scrubby", "subset", "ancestor's",
|
67
|
+
"pelagic", "abscissa", "loofah's", "gleans", "boudoir", "disappointingly",
|
68
|
+
"guardianship's", "settlers", "Mylar", "timetable's", "parabolic",
|
69
|
+
"madams", "bootlegger's", "monotonically", "gage", "Karyn's", "deposed",
|
70
|
+
"boozy", "swordfish's", "Chevron", "Victrola", "Tameka", "impels",
|
71
|
+
"carrels", "salami's", "celibate", "resistance's", "duration",
|
72
|
+
"abscissae", "Kilroy's", "corrosive", "flight's", "flapper", "scare",
|
73
|
+
"peppiest", "Pygmies", "Menzies", "wrist's", "enumerable", "housecoats",
|
74
|
+
"Khwarizmi's", "stampeding", "hungering", "steeping", "Yemenis",
|
75
|
+
"entangles", "solver", "mishapping", "Rand's", "ninety", "Boris",
|
76
|
+
"impedimenta", "predators", "ridge", "wretchedness's", "crapping", "Head",
|
77
|
+
"Edwards", "Claude's", "geodesics", "verities", "botch", "Short's",
|
78
|
+
"vellum's", "coruscates", "hydrogenates", "Haas's", "deceitfulness",
|
79
|
+
"cohort's", "Cepheus", "totes", "Cortez's", "napalm", "fruitcake",
|
80
|
+
"coordinated", "Coulomb", "desperation", "behoves", "contractor's",
|
81
|
+
"vacationed", "Wanamaker's", "leotard", "filtrated", "cringes", "Lugosi",
|
82
|
+
"sheath's", "orb", "jawed", "Isidro", "geophysics", "persons", "Asians",
|
83
|
+
"booze's", "eight's", "backslappers", "hankered", "dos", "helpings",
|
84
|
+
"tough", "interlarding", "gouger", "inflect", "Juneau's", "hay's",
|
85
|
+
"sardining", "spays", "Brandi", "depressant", "space", "assess",
|
86
|
+
"reappearance's", "Eli's", "Cote", "Enoch", "chants", "ruffianing",
|
87
|
+
"moralised", "unsuccessfully", "or", "Maryland's", "mildest", "unsafer",
|
88
|
+
"dutiful", "Pribilof", "teas", "vagued", "microbiologists", "hedgerow",
|
89
|
+
"speller's", "conservators", "catharsis", "drawbacks", "whooshed",
|
90
|
+
"unlawful", "revolve", "craftsmanship", "destabilise", "Margarito",
|
91
|
+
"Asgard's", "spawn's", "Annabel's", "canonicals", "buttermilk",
|
92
|
+
"exaltation's", "pothole", "reprints", "approximately", "homage",
|
93
|
+
"Wassermann's", "Atlantic's", "exacerbated", "Huerta", "keypunching",
|
94
|
+
"engagements", "dilate", "ponchos", "Helvetius", "Krakatoa", "basket's",
|
95
|
+
"stepmother", "schlock's", "drippings", "cardiology's", "northwesterly",
|
96
|
+
"cruddier", "poesies", "rustproof", "climb", "miscalled", "Belgians",
|
97
|
+
"Iago", "brownout", "nurseries", "hooliganism's", "concourse's",
|
98
|
+
"advocate", "sunrise's", "hyper", "octopus's", "erecting",
|
99
|
+
"counterattacking", "redesign", "studies", "nitrating", "milestone",
|
100
|
+
"bawls", "Nereid", "inferring", "Ontario's", "annexed", "treasury",
|
101
|
+
"cosmogony's", "scandalised", "shindig's", "detention's",
|
102
|
+
"Lollobrigida's", "eradicating", "magpie", "supertankers", "Adventist's",
|
103
|
+
"dozes", "Artaxerxes", "accumulate", "dankest", "telephony", "flows",
|
104
|
+
"Srivijaya's", "fourteen's", "antonym", "rancid", "briefing's",
|
105
|
+
"theologian", "Jacuzzi", "gracing", "chameleon's", "Brittney's",
|
106
|
+
"Pullmans", "Robitussin's", "jitterier", "mayonnaise's", "fort",
|
107
|
+
"closeouts", "amatory", "Drew's", "cockfight", "pyre", "Laura's",
|
108
|
+
"Bradley's", "obstructionists", "interventions", "tenderness's",
|
109
|
+
"loadstones", "castigation's", "undercut", "volubly", "meditated",
|
110
|
+
"Ypsilanti", "Jannie's", "tams", "drummer's", "inaugurations", "mawing",
|
111
|
+
"Anglophile", "Sherpa", "footholds", "Gonzalo", "removers",
|
112
|
+
"customisation", "procurement's", "allured", "grimaced", "captaining",
|
113
|
+
"liberates", "grandeur's", "Windsor", "screwdrivers", "Flynn's",
|
114
|
+
"extortionists", "carnivorous", "thinned", "panhandlers", "trust's",
|
115
|
+
"bemoaned", "untwisted", "cantors", "rectifies", "speculation",
|
116
|
+
"niacin's", "soppy", "condom", "halberd", "Leadbelly", "vocation's",
|
117
|
+
"tanners", "chanticleer", "secretariats", "Ecuador's", "suppurated",
|
118
|
+
"users", "slag's", "atrocity's", "pillar", "sleeveless", "bulldozers",
|
119
|
+
"turners", "hemline", "astounded", "rosaries", "Mallarmé", "crucifies",
|
120
|
+
"Maidenform", "contribution", "evolve", "chemicals", "uteri",
|
121
|
+
"expostulation", "roamers", "daiquiris", "arraignment", "ribs", "King's",
|
122
|
+
"Persepolis", "arsenic's", "blindfolds", "bloodsucker's", "restocks",
|
123
|
+
"falconry", "Olympia's", "Colosseum's", "vigils", "Louie's",
|
124
|
+
"unwillingly", "sealed", "potatoes", "Argentine", "audit's", "outworn",
|
125
|
+
"boggles", "likely", "alleging", "Tinkerbell", "redistribution's",
|
126
|
+
"Normandy", "Cortes", "porter's", "buntings", "cornucopias", "rosewoods",
|
127
|
+
"shelf's", "airdrops", "summits", "Rosalyn", "redecorating", "twirlers",
|
128
|
+
"monsters", "directed", "semiautomatics", "Foch", "Hobart", "mutilates",
|
129
|
+
"Wilma's", "ornamenting", "Clifford's", "pyromania", "Strasbourg",
|
130
|
+
"bleeders", "additions", "super", "effortlessly", "piecing", "vacations",
|
131
|
+
"gybes", "warranted", "Ting", "her", "histrionic", "marshaled", "spore's",
|
132
|
+
"villainy's", "brat", "confusion", "amphitheatre's", "adjourns",
|
133
|
+
"guzzled", "Visayans", "rogue's", "morsels", "candlestick", "flaks",
|
134
|
+
"Waterbury", "pulp's", "endorser's", "postdoc", "coffining", "swallowing",
|
135
|
+
"Wrangell", "Marcie's", "Marley", "untapped", "fear's", "Kant",
|
136
|
+
"pursuit's", "normally", "jackals", "orals", "Paramaribo's", "Marilyn's",
|
137
|
+
"Diem's", "narrower", "medicinally", "chickweed's", "pretentiousness",
|
138
|
+
"Lardner", "baritone's", "purrs", "Pam's", "pestles", "Philip's",
|
139
|
+
"Titania", "eccentrics", "Albion's", "greed's", "raggediest",
|
140
|
+
"importations", "Truman", "incentives", "typified", "incurred",
|
141
|
+
"bandstands", "Minnie's", "pleasant", "Sandy's", "perplexities",
|
142
|
+
"crease's", "obliques", "backstop", "Nair's", "perusing", "Quixote's",
|
143
|
+
"sicknesses", "vapour's", "butte", "lariats", "disfavours", "McGuffey",
|
144
|
+
"paediatric", "filtered", "whiff's", "gunboats", "devolved",
|
145
|
+
"extravaganza's", "organism", "giggling", "citadel's", "counterbalances",
|
146
|
+
"executrixes", "Cathay", "marshmallow's", "iniquitous", "Katmai", "Siva",
|
147
|
+
"welled", "impertinence's", "plunger", "rice", "forgers", "Larousse",
|
148
|
+
"pollution's", "medium", "residue's", "rumbas", "Odis", "arrogant",
|
149
|
+
"Jasper's", "panged", "doubted", "vistaing", "decibel's", "modulus's",
|
150
|
+
"chickpea's", "mugger's", "potentates", "sequesters", "academy's",
|
151
|
+
"Turk's", "pharmacology's", "defogger", "clomp", "soulless", "elastic",
|
152
|
+
"la's", "shards", "unfortunate", "counterclaim's", "objections", "towel",
|
153
|
+
"converged", "z", "ionisation", "stirrups", "antiquarians", "constructor",
|
154
|
+
"virtuosity's", "Göteborg", "centigramme's", "translators", "dalliance's",
|
155
|
+
"us", "bullfight", "drawer's", "nonconformist", "handcrafts", "Magritte",
|
156
|
+
"tulle", "plant's", "routine", "colour's", "latency's", "repertoire's",
|
157
|
+
"photocopies", "catalyse", "ashrams", "lagging", "flapjack's",
|
158
|
+
"ayatollahs", "decentest", "pitted", "conformity", "jack", "batsman",
|
159
|
+
"electrifies", "Unitarians", "obtain", "medicates", "tumour's",
|
160
|
+
"nutritionally", "haystack", "bustles", "slut", "satirising", "birettas",
|
161
|
+
"starring", "Kubrick's", "flogs", "chequering", "Menkalinan's",
|
162
|
+
"Barbados's", "Bioko", "swinish", "hades", "perjured", "timing's",
|
163
|
+
"cocaine", "ejecting", "rationalises", "dilettante's", "umping",
|
164
|
+
"capsized", "frogmen", "matt", "prostituting", "bola's", "devolution's",
|
165
|
+
"poxing", "Maritza's", "snob's", "scoped", "Costco", "feral", "sirocco",
|
166
|
+
"rebating", "truculence", "junkier", "nabs", "elicit", "allegiance",
|
167
|
+
"care", "arteriosclerosis's", "nonproliferation's", "doxologies",
|
168
|
+
"disconsolate", "bodega", "designers", "Rembrandt", "apostasies",
|
169
|
+
"garrulousness", "Hertzsprung's", "hayseeds", "noncooperation's",
|
170
|
+
"resentment", "cuticles", "sandboxes", "gimmicks", "magnolia",
|
171
|
+
"invalidity's", "pulverised", "Tinkerbell's", "hypoglycemics",
|
172
|
+
"gunboat's", "workbench's", "fleetingly's", "sportsman's", "trots",
|
173
|
+
"decomposes", "discrepancies", "owls", "obscener", "organic", "stoutness",
|
174
|
+
"councillor's", "Philippine's", "Aline", "coarsening", "suffocated",
|
175
|
+
"infighting's", "peculiarity", "roof's", "premier", "sucked", "churl",
|
176
|
+
"remounts", "intends", "wiles", "unfold", "unperturbed", "wainscotings",
|
177
|
+
"restfuller", "ashtray's", "wader's", "decanters", "gild", "tandems",
|
178
|
+
"spooked", "galling", "annuity's", "opacity", "clamour's", "flaccid",
|
179
|
+
"caroming", "savvying", "mammalian's", "toadstool's", "doohickey", "jibs",
|
180
|
+
"conquests", "dishes", "effusively", "distinctions", "curly", "Peckinpah",
|
181
|
+
"whining", "quasar", "sponge", "infrequent", "Novembers", "cowling",
|
182
|
+
"poem's", "muzzles", "Sufi", "authoritarians", "prompts", "Gavin's",
|
183
|
+
"morphology's", "shenanigan", "narrated", "rapprochement", "Heine",
|
184
|
+
"propane's", "addition", "prefect's", "pining", "dwindles",
|
185
|
+
"compulsiveness's", "objectors", "trudging", "segregates", "language",
|
186
|
+
"enthralled", "explosiveness", "toeing", "drainers", "Merrimack's",
|
187
|
+
"smarten", "bigwig's", "embroiders", "Medicaids", "grammar's", "behest's",
|
188
|
+
"chiseled", "equalled", "factual", "Casablanca's", "dams",
|
189
|
+
"disillusioned", "turtleneck", "Baden", "provinces", "bushwhacked", "fey",
|
190
|
+
"Yangtze", "loan's", "decent", "strobe", "challenger's", "hometown",
|
191
|
+
"Neal", "Ernestine's", "magnetises", "minute", "patrol", "Starbucks",
|
192
|
+
"Bernstein", "signal", "interplanetary", "tweak", "archdeacon",
|
193
|
+
"untoward", "transducer", "azaleas", "levied", "worlds", "talks",
|
194
|
+
"Tancred", "hairsplitting's", "edibility's", "confab", "rosetted",
|
195
|
+
"Spanish", "Americanisation", "Charley", "realm's", "incongruities",
|
196
|
+
"chinstraps", "dollhouses", "binocular", "popgun", "physiotherapy's",
|
197
|
+
"knave's", "angelically", "heartbreaking", "clarions", "bespeaks",
|
198
|
+
"pivotal", "Zosma", "ungrammatical", "dilution", "tidily", "Dejesus's",
|
199
|
+
"taller", "pennyweight's", "freshman", "Jamestown", "chiefer", "amen",
|
200
|
+
"attiring", "appurtenance's", "opiates", "mottoes", "towellings", "ashen",
|
201
|
+
"font's", "spoors", "pupil", "groom's", "skimpy", "achieves",
|
202
|
+
"intolerance's", "ardour's", "exorcist", "bottoming", "snag's",
|
203
|
+
"Frenches", "hysteric's", "ladyfinger's", "differences", "seed",
|
204
|
+
"clubfoot's", "glades", "Elton's", "jargon", "Waldo", "grinning",
|
205
|
+
"coherence's", "winos", "turnround", "appended", "Ethelred's", "delete",
|
206
|
+
"steadfastness's", "miss", "thermoplastic", "depraves", "unctuous",
|
207
|
+
"reanimates", "transfusing", "protects", "Babbage's", "foists", "inn",
|
208
|
+
"etched", "sanctimoniously", "idling", "timepiece", "holistic",
|
209
|
+
"waterside", "ulna's", "swindled", "employables", "zebra", "nieces",
|
210
|
+
"pertained", "usages", "vamp's", "Larry's", "cooler's", "holographs",
|
211
|
+
"clewing", "stubborning", "peaked", "underfeeds", "marshmallows",
|
212
|
+
"agreeable", "beards", "Slovenia's", "nitroglycerin", "palls", "impurer",
|
213
|
+
"armours", "stomachaches", "notification's", "Dixieland's", "crozier's",
|
214
|
+
"neurotic", "kudos", "Tania's", "M", "soundtrack's", "territory's",
|
215
|
+
"sped", "house's", "divisibility", "ingress's", "pummelled", "Isabel",
|
216
|
+
"Dewitt", "seemly", "hutched", "calliope", "lengthwise", "flubs",
|
217
|
+
"Moldavia's", "Mercia", "McBride's", "Lenten", "pulverise", "football",
|
218
|
+
"oligarchy", "Max", "scribbler", "acclimatize", "brainwashes",
|
219
|
+
"apprenticed", "benevolences", "two", "Wodehouse", "crew's", "massacre",
|
220
|
+
"proportionals", "Jewishness's", "instep's", "emissary", "folder",
|
221
|
+
"nonentity's", "convinced", "caption", "kangarooed", "dogie",
|
222
|
+
"vagabonding", "auction's", "appraising", "antimony", "part's",
|
223
|
+
"longitude's", "inconsiderateness's", "pawning", "serer", "solos",
|
224
|
+
"histories", "mushy", "parturition", "munched", "oregano", "inanest",
|
225
|
+
"dryness", "kitchenware", "unexpected", "covens", "cheesecakes",
|
226
|
+
"stakeout's", "Pulaski's", "Yoknapatawpha's", "pinhead", "drifted",
|
227
|
+
"guzzler's", "funking", "sou'wester", "oesophagus's", "highbrow",
|
228
|
+
"contralto", "meningitis", "Mazzini", "raggedest", "vaginas", "misfiring",
|
229
|
+
"margaritas", "wedder", "pointed", "slicked", "garlanded", "comeuppances",
|
230
|
+
"vassals", "Sui", "Concord", "bozos", "Garry's", "Maribel's", "epileptic",
|
231
|
+
"Jehoshaphat's", "revolutionary's", "kneecaps", "songbird", "actively",
|
232
|
+
"Meredith", "toddler", "distrusting", "fuchsias", "perusal", "instills",
|
233
|
+
"deathbed", "sunspot's", "spatula's", "Muscovy", "humaniser", "Keats",
|
234
|
+
"regrets", "deflect", "theories", "nonpluses", "populating", "leniency's",
|
235
|
+
"penicillin's", "gaol's", "borough", "moose's", "dogmata",
|
236
|
+
"transcendentally", "supposition's", "nursed", "Gagarin's", "honest",
|
237
|
+
"Chandrasekhar's", "mudslinger's", "parable", "bonged", "Wyeth's",
|
238
|
+
"Ochoa's", "Grenoble", "steamy", "halter's", "rotisserie's", "pagoda's",
|
239
|
+
"wallaby's", "Yank", "pretzel", "rapist's", "estrange", "hectored",
|
240
|
+
"Puebla's", "conniver", "creditor's", "dole's", "Fotomat", "patents",
|
241
|
+
"heckling", "thickener", "etches", "yogi", "hemstitched", "obverses",
|
242
|
+
"Lipizzaner", "divert", "Strong's", "sagest", "Alabama", "He", "Carrie's",
|
243
|
+
"obligation's", "verity's", "outed", "Rhee", "bluffed", "codas",
|
244
|
+
"crèche's", "unpalatable", "dilettanti", "vestment", "purse's",
|
245
|
+
"inflammation's", "bookmarked", "doing's", "whinnying", "impersonators",
|
246
|
+
"Theiler", "scurried", "resistor", "southerners", "Anacreon",
|
247
|
+
"reconstruction's", "footage", "trespassing", "Kafka", "bottling",
|
248
|
+
"stays", "Gretzky", "overburdening", "princesses", "weathercock's",
|
249
|
+
"atolls", "cheerier", "packet", "surrenders", "teacup", "Sabik's",
|
250
|
+
"undecidable", "lollygagged", "pawl's", "anaesthesiology", "sublimely",
|
251
|
+
"contortionists", "motorcades", "Maureen", "lamasery", "yourselves",
|
252
|
+
"Creighton", "poliomyelitis's", "civil", "outmanoeuvre", "lauded",
|
253
|
+
"closeness", "Humboldt's", "pretzels", "ungrudging", "blackguard's",
|
254
|
+
"sickles", "typo", "narcotics", "linesman", "psychotics", "pictured",
|
255
|
+
"deviltry", "Yahtzee", "Lovelace's", "cerebra", "airiness's", "bewitch",
|
256
|
+
"how", "motherland's", "crate's", "Keenan's", "turnstile's",
|
257
|
+
"pedometer's", "carted", "slipping", "fallow", "Canadian", "ladybird's",
|
258
|
+
"thump", "shopper's", "enters", "scowls", "nematode", "focused",
|
259
|
+
"Riley's", "grainiest", "novas", "snuffled", "leftovers", "deify",
|
260
|
+
"Samoan", "pruning", "contenting", "Khachaturian's", "triads",
|
261
|
+
"genealogies", "psalmist", "shaming", "appropriated", "ignominies",
|
262
|
+
"Beadle's", "MHz", "peerages", "facile", "Seoul", "Janna's", "jig's",
|
263
|
+
"mousiness's", "funnier", "delimiter", "watermark", "sheik's", "Reasoner",
|
264
|
+
"ipecac's", "curdles", "wronged", "Segovia's", "solders", "Dunne's",
|
265
|
+
"contractor", "awards", "hostels", "pinkie's", "Herzl", "misplace",
|
266
|
+
"shuttle", "innovative", "vestries", "cosmoses", "trikes", "Casandra's",
|
267
|
+
"hokier", "carouser's", "summerhouses", "renascence", "decomposed",
|
268
|
+
"Balzac's", "outlast", "shod", "squalling", "smugging", "weighing",
|
269
|
+
"omega's", "selects", "fleetingly", "Finland", "petted", "disrespects",
|
270
|
+
"fetter", "confound", "brads", "Bosnia's", "preposition's", "guy's",
|
271
|
+
"different", "tracts", "paediatrics's", "polygon", "eyetooth's", "Aesop",
|
272
|
+
"pentagons", "professions", "homeowner", "looter's", "intimidated",
|
273
|
+
"lustre's", "loneliness", "catnapped", "counties", "pailful",
|
274
|
+
"Christendom's", "Barents", "penis", "Mumford's", "Nigel", "éclairs",
|
275
|
+
"splats", "diabolical", "popularly", "quart", "abjected", "Rasalgethi",
|
276
|
+
"camel's", "inimical", "overweening", "distention's", "Advil", "casement",
|
277
|
+
"seamier", "avaricious", "sierra's", "caparison's", "moldered", "Cortez",
|
278
|
+
"handmaid's", "disappointment", "billowed", "overpopulated", "outsets",
|
279
|
+
"ray", "smoother", "overkill", "somber", "tiller's", "zigzag", "adviser",
|
280
|
+
"absorption's", "sturdily", "hairy", "bloodmobile", "investiture's",
|
281
|
+
"creature", "ripeness's", "Jonathon", "arborvitae's", "skulduggery",
|
282
|
+
"bog", "skeleton's", "Kit's", "Panamas", "Ashlee's", "jazzy", "snit",
|
283
|
+
"divisive", "caribous", "permuting", "frankest", "annotated", "oak's",
|
284
|
+
"meg's", "Gill", "burrito", "dormancy's", "offings", "Nike",
|
285
|
+
"outnumbered", "skater's", "Portugal", "deficit", "Cannon's", "pockmark",
|
286
|
+
"sediment's", "mailbox", "innuendoed", "retire", "wolfhound's",
|
287
|
+
"nicotine's", "brigade's", "mettle's", "softhearted", "hooey's",
|
288
|
+
"abdication", "Orval", "Jaime", "ship", "hyphenations", "sectarians",
|
289
|
+
"Alabaman", "tagging", "ultras", "schizoids", "medicines", "undersized",
|
290
|
+
"Gray", "maternity's", "bandaging", "scooping", "coercion's", "serapes",
|
291
|
+
"celebrate", "Listerine's", "throve", "crypt's", "nearsighted",
|
292
|
+
"metallurgists", "Delicious", "cotton's", "yoked", "cogitates",
|
293
|
+
"underage", "cigarette's", "hallways", "Cointreau", "ma'am", "spacing's",
|
294
|
+
"foresight", "parkway's", "Edwardian", "mediator", "Turner", "Derrida's",
|
295
|
+
"motorist's", "hobo", "equivalences", "sophism", "peeping", "telescoped",
|
296
|
+
"overproduce", "ductility", "Leblanc", "refractory", "passé", "decodes",
|
297
|
+
"womanising", "flax's", "pond's", "infrequency", "talkativeness's",
|
298
|
+
"settlement's", "Prince", "bating", "multimillionaire", "Schultz",
|
299
|
+
"premiss", "quackery", "bathhouse", "Leno's", "Monday's", "Hung's",
|
300
|
+
"undaunted", "bewaring", "tension's", "Chile's", "Rostand's", "platoons",
|
301
|
+
"rodeo's", "Dionne", "Dyson's", "gingivitis's", "fewer",
|
302
|
+
"electromagnetism's", "scrubbier", "ensconced", "wretcheder", "mica's",
|
303
|
+
"expectorant", "snapper's", "chastised", "habitation", "spry", "bathing",
|
304
|
+
"stealth's", "champagnes", "baleful", "fencing's", "threaded", "codicils",
|
305
|
+
"disgraced", "redcaps", "addends", "Olivier", "clasped", "Gwendolyn",
|
306
|
+
"foment", "angularity's", "strenuously", "gorilla", "misbehaved",
|
307
|
+
"surplus's", "newsier", "positioned", "bloodmobiles", "circumstantials",
|
308
|
+
"person's", "varicose", "Calliope", "plethora", "Olmsted",
|
309
|
+
"reconciliation", "Brendan's", "beset", "totters", "sailors",
|
310
|
+
"parliamentarians", "Whitaker", "hilts", "pummelling", "academician's",
|
311
|
+
"ruse", "discreeter", "appetisingly", "perfections", "anus", "overrode",
|
312
|
+
"pedantry's", "possessed", "germs", "unscrews", "expired",
|
313
|
+
"semitrailer's", "Cupid's", "nonsmoker", "Marathon", "secs", "Hopkins",
|
314
|
+
"freeing", "libelled", "furious", "staccatos", "electroencephalogram's",
|
315
|
+
"malingerer's", "impulses", "briars", "Tran", "hilltops", "sulks",
|
316
|
+
"quailed", "fads", "retrenches", "spouted", "outtake", "puncture's",
|
317
|
+
"rats", "kibitzed", "berets", "omnivorous", "flange", "Mons", "glints",
|
318
|
+
"mansards", "thou", "cuing", "suspected", "Kaiser's", "savvier", "skits",
|
319
|
+
"interdict's", "Booker", "Rubinstein", "Tm's", "crossing's", "dewlap",
|
320
|
+
"guarantor's", "edification's", "joyfullest", "crossed", "chowdering",
|
321
|
+
"sillier", "reloading", "commodity's", "bodkins", "conduced", "coughs",
|
322
|
+
"nucleus's", "sixtieth", "proverbially", "comprehensive", "ineluctably",
|
323
|
+
"patrolmen", "resuscitating", "carpetbag's", "Darrin's", "Yeager",
|
324
|
+
"Bataan's", "spoonsful", "proceeds", "wrongdoer", "Karroo", "heart",
|
325
|
+
"poison", "typifying", "endowment's", "aquanauts", "deaconesses",
|
326
|
+
"homosexuality", "Maxine", "haunching", "centred", "Peking's",
|
327
|
+
"toothiest", "growers", "firebombs", "throbs", "Downy", "contribution's",
|
328
|
+
"sago's", "Cole", "Knoxville", "leftmost", "Nell's", "Baffin", "barrings",
|
329
|
+
"contagions", "disencumbers", "countdown", "quintuple", "perihelion",
|
330
|
+
"creationism's", "actioning", "admiralty", "Mt's", "durability's",
|
331
|
+
"sewer's", "replicas", "oxide", "ripened", "Pisces's", "Cinerama's",
|
332
|
+
"catheters", "oppressive", "roosting", "foggiest", "properly", "Kareem",
|
333
|
+
"Ollie", "minuted", "vehicles", "eel", "remunerates", "swashbuckler's",
|
334
|
+
"remunerative", "sanguining", "Belem's", "forlornly", "rudders",
|
335
|
+
"officialdom", "countertenors", "Upton", "whoop", "animations", "arouses",
|
336
|
+
"millionths", "videocassette", "fledgling", "shake", "exterminated",
|
337
|
+
"Cain's", "trendiest", "wariest", "torpedoes", "airmails", "Cameron's",
|
338
|
+
"discord's", "spitefulness's", "thudded", "menaced", "takeovers",
|
339
|
+
"solicited", "wallpapers", "economic", "cache", "rechargeable", "gongs",
|
340
|
+
"droning", "exemption", "Alaskans", "toothed", "snifter", "Stephens",
|
341
|
+
"prejudge", "doctor's", "bobolinks", "rotates", "valuation's", "narrator",
|
342
|
+
"weaning", "uncle", "shelter", "destitution's", "Edgardo's", "gauge",
|
343
|
+
"Nice", "Adolf's", "rheumatics", "inheritances", "undesirables",
|
344
|
+
"Eileen's", "flyweight's", "scope", "possessiveness", "tipsily",
|
345
|
+
"effulgence", "rematch", "Baltic", "unsteadiest", "rodeos", "gloaming's",
|
346
|
+
"ringers", "randomised", "commissars", "destroyer's", "router",
|
347
|
+
"disengaging", "it's", "Albert", "rampantly", "varmint", "Adkins",
|
348
|
+
"chevron", "insomniac", "bobsledded", "masochist's", "chronometers",
|
349
|
+
"compaction", "Mauro", "sidled", "Highlander's", "snail's", "syllabifies",
|
350
|
+
"application's", "symmetrical", "blacking", "accent's", "sentimentalists",
|
351
|
+
"sonatas", "profanities", "sloping", "Araby", "percolate", "repeated",
|
352
|
+
"youthfulness's", "Loyola", "deliriously", "matriarch's", "tailors",
|
353
|
+
"rerouting", "hairpin", "dispersal", "endowment", "disquieting", "swat",
|
354
|
+
"neckerchieves", "wrinkles", "amoebas", "Darcy", "orthodontics's",
|
355
|
+
"milder", "sneezing", "prescience's", "pads", "wrought", "perspicuity's",
|
356
|
+
"materialist", "pull", "laundryman's", "lazily", "protractor's", "Vic",
|
357
|
+
"photocopier", "guardrooms", "cablecasting", "confirms", "excretions",
|
358
|
+
"combatant", "counterfeiters", "periwig", "genteelest", "router's",
|
359
|
+
"springy", "procreated", "syphon", "parent's", "bigwigs", "rebelled",
|
360
|
+
"milkmaids", "McGee's", "seaworthier", "Bellatrix's", "tenement",
|
361
|
+
"embryologists", "Vaselining", "burrow's", "tonnage's", "Petty's",
|
362
|
+
"chancels", "scouring", "mouser", "recompensed", "guarding", "editor",
|
363
|
+
"raster", "bourgeoisie's", "interpolating", "skinflint's", "transport",
|
364
|
+
"bullfinch", "needlessly", "withholds", "counterclockwise", "panicking",
|
365
|
+
"Ahriman", "flambeing", "contrary", "heartstrings", "whittled", "crib's",
|
366
|
+
"highlighter", "extroverted", "Martinique's", "racquets", "Maldivian",
|
367
|
+
"physiognomy", "Hammarskjold", "massage", "shingling", "neighbourhood",
|
368
|
+
"boobed", "vulture", "intercontinental", "cobblers", "peddlers",
|
369
|
+
"forthrightly", "germicide", "raindrop's", "fir's", "decaffeinates",
|
370
|
+
"wobblier", "abnegated", "cruiser's", "satiety", "trilled", "impending",
|
371
|
+
"gulf", "mountebank", "beltway", "reappointment", "cinematographer",
|
372
|
+
"pylon", "penthouses", "morally", "installs", "Walsh's", "drawstring",
|
373
|
+
"circus's", "Khayyam's", "Myrtle's", "ventrals", "category's",
|
374
|
+
"opportunistic", "grovelling", "warier", "upchuck", "hairdresser's",
|
375
|
+
"Montanans", "jobber", "dazzle", "encirclement's", "muffin's", "coronets",
|
376
|
+
"focus's", "footfall's", "subjunctives", "late", "pedagogued",
|
377
|
+
"dignitaries", "content", "blockbusters", "reminiscent", "mayor",
|
378
|
+
"specifier", "extinction", "nutshell's", "catbird's", "bundle",
|
379
|
+
"gracefulness", "exceed", "estranges", "chancy", "bankrupted", "Avery",
|
380
|
+
"Barnett", "succulence", "stacking", "ensnare", "truck", "embargo",
|
381
|
+
"persecutes", "translation's", "muskrat's", "illumines", "undercoat's",
|
382
|
+
"fleecier", "brick", "qualities", "imprecision", "reprisals", "discounts",
|
383
|
+
"harmonics", "Mann's", "terrorism", "interminable", "Santiago's",
|
384
|
+
"deepness", "tramples", "golder", "voyeurism's", "tent", "particle's",
|
385
|
+
"minuend", "waxwings", "knobby", "trustee", "funnily", "hotheadedness's",
|
386
|
+
"Kristin", "what", "bite", "murmur's", "pustule's", "weeknights",
|
387
|
+
"rocked", "athlete", "ventilates", "impresses", "daguerreotyping",
|
388
|
+
"Gross", "gambols", "villa", "maraud", "disapproval", "apostrophe's",
|
389
|
+
"sheaf", "noisemaker's", "autonomy's", "massing", "daemon's", "Thackeray",
|
390
|
+
"fermenting", "whammy", "philosophise", "empathy", "calamities",
|
391
|
+
"sunbathe", "Qom", "yahoo's", "coxcomb's", "move", "school's",
|
392
|
+
"rainmakers", "shipwreck", "potbelly's", "courageously", "current",
|
393
|
+
"Aleut", "treaties", "U", "always", "Bosch", "impregnating", "bud's",
|
394
|
+
"carat", "centrists", "acquaintance's", "convoy's", "chichis",
|
395
|
+
"restraint's", "Cosby", "factotums", "handshaking", "paragon's",
|
396
|
+
"mileages", "Tammie", "cartoonists", "lemmas", "lowliness's", "onion's",
|
397
|
+
"E's", "Bible", "Cranmer", "fob's", "minks", "overstocking", "Willamette",
|
398
|
+
"needle's", "scuppers", "Carborundum", "upwardly", "tallies", "aptitude",
|
399
|
+
"synod", "nasturtium's", "Pensacola", "snappish", "merino", "sups",
|
400
|
+
"fingerboard's", "prodigy's", "narcissism's", "substantial", "lug",
|
401
|
+
"establishing", "Vergil's", "patrimonies", "shorted", "forestation",
|
402
|
+
"undeniable", "Katmandu", "lamination", "trollop's", "odd", "stanza",
|
403
|
+
"paraplegic", "melanin", "Rico", "foreman", "stereotypes", "affinity's",
|
404
|
+
"cleansing", "sautéing", "epochs", "crooners", "manicured", "undisclosed",
|
405
|
+
"propel", "usage", "Alioth's", "Aurelia's", "peruse", "Vassar's",
|
406
|
+
"Demosthenes's", "Brazos", "supermarket", "scribbles", "Jekyll's",
|
407
|
+
"discomfort's", "mastiffs", "ballasting", "Figueroa", "turnstiles",
|
408
|
+
"convince", "Shelton's", "Gustavo", "shunting", "Fujitsu's", "fining's",
|
409
|
+
"hippos", "dam's", "expressionists", "peewee", "troop's"
|
410
|
+
]
|
411
|
+
WORDS_SIZE = WORDS.size
|
412
|
+
|
413
|
+
def random_word
|
414
|
+
return WORDS[rand(WORDS_SIZE)]
|
415
|
+
end
|
416
|
+
|
417
|
+
def random_sentence(max_len)
|
418
|
+
sentence = ""
|
419
|
+
(1 + rand(max_len)).times { sentence << " " << random_word }
|
420
|
+
return sentence
|
421
|
+
end
|
422
|
+
|
423
|
+
def random_doc(max_fields = 10, max_elements = 10, max_len = 100)
|
424
|
+
doc = {}
|
425
|
+
(1 + rand(max_fields)).times do
|
426
|
+
field = random_word.intern
|
427
|
+
elem_count = rand(max_elements) + 1
|
428
|
+
if (elem_count == 1)
|
429
|
+
doc[field] = random_sentence(max_len)
|
430
|
+
else
|
431
|
+
doc[field] = []
|
432
|
+
elem_count.times { doc[field] << random_sentence(max_len)}
|
433
|
+
end
|
434
|
+
end
|
435
|
+
return doc
|
436
|
+
end
|
437
|
+
end
|
@@ -0,0 +1,315 @@
|
|
1
|
+
module IndexTestHelper
|
2
|
+
include Ferret::Index
|
3
|
+
include Ferret::Analysis
|
4
|
+
include Ferret::Search
|
5
|
+
|
6
|
+
def IndexTestHelper.make_binary(size)
|
7
|
+
tmp = Array.new(size)
|
8
|
+
size.times {|i| tmp[i] = i%256 }
|
9
|
+
return tmp.pack("c*")
|
10
|
+
end
|
11
|
+
|
12
|
+
BINARY_DATA = IndexTestHelper.make_binary(256)
|
13
|
+
COMPRESSED_BINARY_DATA = IndexTestHelper.make_binary(56)
|
14
|
+
|
15
|
+
def IndexTestHelper.prepare_document(dir)
|
16
|
+
fis = FieldInfos.new
|
17
|
+
fis.add_field(:text_field1, :term_vector => :no)
|
18
|
+
fis.add_field(:text_field2)
|
19
|
+
fis.add_field(:key_field, :index => :untokenized)
|
20
|
+
fis.add_field(:unindexed_field, :index => :no)
|
21
|
+
fis.add_field(:unstored_field1, :store => :no, :term_vector => :no)
|
22
|
+
fis.add_field(:unstored_field2, :store => :no, :term_vector => :yes)
|
23
|
+
fis.add_field(:compressed_field, :store => :compressed, :term_vector => :yes)
|
24
|
+
fis.add_field(:binary_field, :index => :no, :term_vector => :no)
|
25
|
+
fis.add_field(:compressed_binary_field, :store => :compressed,
|
26
|
+
:index => :no, :term_vector => :no)
|
27
|
+
doc = {
|
28
|
+
:text_field1 => "field one text",
|
29
|
+
:text_field2 => "field field field two text",
|
30
|
+
:key_field => "keyword",
|
31
|
+
:unindexed_field => "unindexed field text",
|
32
|
+
:unstored_field1 => "unstored field text one",
|
33
|
+
:unstored_field2 => "unstored field text two",
|
34
|
+
:compressed_field => "compressed text",
|
35
|
+
:binary_field => BINARY_DATA,
|
36
|
+
:compressed_binary_field => COMPRESSED_BINARY_DATA
|
37
|
+
}
|
38
|
+
return doc, fis
|
39
|
+
end
|
40
|
+
|
41
|
+
def IndexTestHelper.prepare_documents
|
42
|
+
[
|
43
|
+
["apple", "green"],
|
44
|
+
["apple", "red"],
|
45
|
+
["orange", "orange"],
|
46
|
+
["grape", "green"],
|
47
|
+
["grape", "purple"],
|
48
|
+
["mandarin", "orange"],
|
49
|
+
["peach", "orange"],
|
50
|
+
["apricot", "orange"]
|
51
|
+
].map { |food| {"name" => food[0], "colour" => food[1]} }
|
52
|
+
end
|
53
|
+
|
54
|
+
def IndexTestHelper.prepare_book_list
|
55
|
+
books = [
|
56
|
+
{"author" => "P.H. Newby",
|
57
|
+
"title" => "Something To Answer For",
|
58
|
+
"year" => "1969"},
|
59
|
+
{"author" => "Bernice Rubens",
|
60
|
+
"title" => "The Elected Member",
|
61
|
+
"year" => "1970"},
|
62
|
+
{"author" => "V. S. Naipaul",
|
63
|
+
"title" => "In a Free State",
|
64
|
+
"year" => "1971"},
|
65
|
+
{"author" => "John Berger",
|
66
|
+
"title" => "G",
|
67
|
+
"year" => "1972"},
|
68
|
+
{"author" => "J. G. Farrell",
|
69
|
+
"title" => "The Siege of Krishnapur",
|
70
|
+
"year" => "1973"},
|
71
|
+
{"author" => "Stanley Middleton",
|
72
|
+
"title" => "Holiday",
|
73
|
+
"year" => "1974"},
|
74
|
+
{"author" => "Nadine Gordimer",
|
75
|
+
"title" => "The Conservationist",
|
76
|
+
"year" => "1974"},
|
77
|
+
{"author" => "Ruth Prawer Jhabvala",
|
78
|
+
"title" => "Heat and Dust",
|
79
|
+
"year" => "1975"},
|
80
|
+
{"author" => "David Storey",
|
81
|
+
"title" => "Saville",
|
82
|
+
"year" => "1976"},
|
83
|
+
{"author" => "Paul Scott",
|
84
|
+
"title" => "Staying On",
|
85
|
+
"year" => "1977"},
|
86
|
+
{"author" => "Iris Murdoch",
|
87
|
+
"title" => "The Sea",
|
88
|
+
"year" => "1978"},
|
89
|
+
{"author" => "Penelope Fitzgerald",
|
90
|
+
"title" => "Offshore",
|
91
|
+
"year" => "1979"},
|
92
|
+
{"author" => "William Golding",
|
93
|
+
"title" => "Rites of Passage",
|
94
|
+
"year" => "1980"},
|
95
|
+
{"author" => "Salman Rushdie",
|
96
|
+
"title" => "Midnight's Children",
|
97
|
+
"year" => "1981"},
|
98
|
+
{"author" => "Thomas Keneally",
|
99
|
+
"title" => "Schindler's Ark",
|
100
|
+
"year" => "1982"},
|
101
|
+
{"author" => "J. M. Coetzee",
|
102
|
+
"title" => "Life and Times of Michael K",
|
103
|
+
"year" => "1983"},
|
104
|
+
{"author" => "Anita Brookner",
|
105
|
+
"title" => "Hotel du Lac",
|
106
|
+
"year" => "1984"},
|
107
|
+
{"author" => "Keri Hulme",
|
108
|
+
"title" => "The Bone People",
|
109
|
+
"year" => "1985"},
|
110
|
+
{"author" => "Kingsley Amis",
|
111
|
+
"title" => "The Old Devils",
|
112
|
+
"year" => "1986"},
|
113
|
+
{"author" => "Penelope Lively",
|
114
|
+
"title" => "Moon Tiger",
|
115
|
+
"year" => "1987"},
|
116
|
+
{"author" => "Peter Carey",
|
117
|
+
"title" => "Oscar and Lucinda",
|
118
|
+
"year" => "1988"},
|
119
|
+
{"author" => "Kazuo Ishiguro",
|
120
|
+
"title" => "The Remains of the Day",
|
121
|
+
"year" => "1989"},
|
122
|
+
{"author" => "A. S. Byatt",
|
123
|
+
"title" => "Possession",
|
124
|
+
"year" => "1990"},
|
125
|
+
{"author" => "Ben Okri",
|
126
|
+
"title" => "The Famished Road",
|
127
|
+
"year" => "1991"},
|
128
|
+
{"author" => "Michael Ondaatje",
|
129
|
+
"title" => "The English Patient",
|
130
|
+
"year" => "1992"},
|
131
|
+
{"author" => "Barry Unsworth",
|
132
|
+
"title" => "Sacred Hunger",
|
133
|
+
"year" => "1992"},
|
134
|
+
{"author" => "Roddy Doyle",
|
135
|
+
"title" => "Paddy Clarke Ha Ha Ha",
|
136
|
+
"year" => "1993"},
|
137
|
+
{"author" => "James Kelman",
|
138
|
+
"title" => "How Late It Was, How Late",
|
139
|
+
"year" => "1994"},
|
140
|
+
{"author" => "Pat Barker",
|
141
|
+
"title" => "The Ghost Road",
|
142
|
+
"year" => "1995"},
|
143
|
+
{"author" => "Graham Swift",
|
144
|
+
"title" => "Last Orders",
|
145
|
+
"year" => "1996"},
|
146
|
+
{"author" => "Arundati Roy",
|
147
|
+
"title" => "The God of Small Things",
|
148
|
+
"year" => "1997"},
|
149
|
+
{"author" => "Ian McEwan",
|
150
|
+
"title" => "Amsterdam",
|
151
|
+
"year" => "1998"},
|
152
|
+
{"author" => "J. M. Coetzee",
|
153
|
+
"title" => "Disgrace",
|
154
|
+
"year" => "1999"},
|
155
|
+
{"author" => "Margaret Atwood",
|
156
|
+
"title" => "The Blind Assassin",
|
157
|
+
"year" => "2000"},
|
158
|
+
{"author" => "Peter Carey",
|
159
|
+
"title" => "True History of the Kelly Gang",
|
160
|
+
"year" => "2001"},
|
161
|
+
{"author" => "Yann Martel",
|
162
|
+
"title" => "The Life of Pi",
|
163
|
+
"year" => "2002"},
|
164
|
+
{"author" => "DBC Pierre",
|
165
|
+
"title" => "Vernon God Little",
|
166
|
+
"year" => "2003"}
|
167
|
+
]
|
168
|
+
end
|
169
|
+
|
170
|
+
def self.prepare_ir_test_fis
|
171
|
+
fis = FieldInfos.new
|
172
|
+
fis.add_field(:body)
|
173
|
+
fis.add_field(:changing_field, :term_vector => :no)
|
174
|
+
fis.add_field(:title, :index => :untokenized, :term_vector => :with_offsets)
|
175
|
+
fis.add_field(:author, :term_vector => :with_positions)
|
176
|
+
fis.add_field(:year, :index => :no, :term_vector => :no)
|
177
|
+
fis.add_field(:text, :store => :no, :term_vector => :no)
|
178
|
+
end
|
179
|
+
|
180
|
+
INDEX_TEST_DOC_COUNT = 64
|
181
|
+
def self.prepare_ir_test_docs
|
182
|
+
docs = []
|
183
|
+
docs[0] = {
|
184
|
+
:body => "Where is Wally",
|
185
|
+
:changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
|
186
|
+
"word3 word3",
|
187
|
+
}
|
188
|
+
docs[1] = {
|
189
|
+
:body => "Some Random Sentence read"
|
190
|
+
}
|
191
|
+
docs[2] = {
|
192
|
+
:body => "Some read Random Sentence read"
|
193
|
+
}
|
194
|
+
docs[3] = {
|
195
|
+
:title => "War And Peace",
|
196
|
+
:body => "word3 word4 word1 word2 word1 word3 word4 word1 word3 word3",
|
197
|
+
:author => "Leo Tolstoy",
|
198
|
+
:year => "1865",
|
199
|
+
:text => "more text which is not stored"
|
200
|
+
}
|
201
|
+
docs[4] = {
|
202
|
+
:body => "Some Random Sentence"
|
203
|
+
}
|
204
|
+
docs[5] = {
|
205
|
+
:body => "Here's Wally"
|
206
|
+
}
|
207
|
+
docs[6] = {
|
208
|
+
:body => "Some Random Sentence read read read read"
|
209
|
+
}
|
210
|
+
docs[7] = {
|
211
|
+
:body => "Some Random Sentence"
|
212
|
+
}
|
213
|
+
docs[8] = {
|
214
|
+
:body => "Some Random Sentence"
|
215
|
+
}
|
216
|
+
docs[9] = {
|
217
|
+
:body => "read Some Random Sentence read this will be used after " +
|
218
|
+
"unfinished next position read"
|
219
|
+
}
|
220
|
+
docs[10] = {
|
221
|
+
:body => "Some read Random Sentence",
|
222
|
+
:changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
|
223
|
+
"word3 word3"
|
224
|
+
}
|
225
|
+
docs[11] = {
|
226
|
+
:body => "And here too. Well, maybe Not"
|
227
|
+
}
|
228
|
+
docs[12] = {
|
229
|
+
:body => "Some Random Sentence"
|
230
|
+
}
|
231
|
+
docs[13] = {
|
232
|
+
:body => "Some Random Sentence"
|
233
|
+
}
|
234
|
+
docs[14] = {
|
235
|
+
:body => "Some Random Sentence"
|
236
|
+
}
|
237
|
+
docs[15] = {
|
238
|
+
:body => "Some Random Sentence"
|
239
|
+
}
|
240
|
+
docs[16] = {
|
241
|
+
:body => "Some Random read read Sentence"
|
242
|
+
}
|
243
|
+
docs[17] = {
|
244
|
+
:body => "Some Random read Sentence",
|
245
|
+
:changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
|
246
|
+
"word3 word3"
|
247
|
+
}
|
248
|
+
docs[18] = {
|
249
|
+
:body => "Wally Wally Wally"
|
250
|
+
}
|
251
|
+
docs[19] = {
|
252
|
+
:body => "Some Random Sentence",
|
253
|
+
:changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
|
254
|
+
"word3 word3"
|
255
|
+
}
|
256
|
+
docs[20] = {
|
257
|
+
:body => "Wally is where Wally usually likes to go. Wally Mart! Wally " +
|
258
|
+
"likes shopping there for Where's Wally books. Wally likes " +
|
259
|
+
"to read",
|
260
|
+
:changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
|
261
|
+
"word3 word3"
|
262
|
+
}
|
263
|
+
docs[21] = {
|
264
|
+
:body => "Some Random Sentence read read read and more read read read",
|
265
|
+
:changing_field => "word3 word4 word1 word2 word1 word3 word4 word1 " +
|
266
|
+
"word3 word3"
|
267
|
+
}
|
268
|
+
|
269
|
+
buf = ""
|
270
|
+
21.times { buf << "skip " }
|
271
|
+
22.upto(INDEX_TEST_DOC_COUNT-1) do |i|
|
272
|
+
buf << "skip "
|
273
|
+
docs[i] = {:text => buf.clone}
|
274
|
+
end
|
275
|
+
return docs
|
276
|
+
end
|
277
|
+
|
278
|
+
INDEX_TEST_DOCS = self.prepare_ir_test_docs()
|
279
|
+
INDEX_TEST_FIS = self.prepare_ir_test_fis()
|
280
|
+
|
281
|
+
def self.prepare_search_docs
|
282
|
+
i = 1
|
283
|
+
[
|
284
|
+
["20050930", "cat1/", "word1" ],
|
285
|
+
["20051001", "cat1/sub1", "word1 word2 the quick brown fox" ],
|
286
|
+
["20051002", "cat1/sub1/subsub1", "word1 word3" ],
|
287
|
+
["20051003", "cat1/sub2", "word1 word3" ],
|
288
|
+
["20051004", "cat1/sub2/subsub2", "word1 word2" ],
|
289
|
+
["20051005", "cat2/sub1", "word1" ],
|
290
|
+
["20051006", "cat2/sub1", "word1 word3" ],
|
291
|
+
["20051007", "cat2/sub1", "word1" ],
|
292
|
+
["20051008", "cat2/sub1", "word1 word2 word3 the fast brown fox"],
|
293
|
+
["20051009", "cat3/sub1", "word1" ],
|
294
|
+
["20051010", "cat3/sub1", "word1" ],
|
295
|
+
["20051011", "cat3/sub1", "word1 word3 the quick red fox" ],
|
296
|
+
["20051012", "cat3/sub1", "word1" ],
|
297
|
+
["20051013", "cat1/sub2", "word1" ],
|
298
|
+
["20051014", "cat1/sub1", "word1 word3 the quick hairy fox" ],
|
299
|
+
["20051015", "cat1/sub2/subsub1", "word1" ],
|
300
|
+
["20051016", "cat1/sub1/subsub2",
|
301
|
+
"word1 the quick fox is brown and hairy and a little red" ],
|
302
|
+
["20051017", "cat1/",
|
303
|
+
"word1 the brown fox is quick and red" ]
|
304
|
+
].map do |date, category, field|
|
305
|
+
doc = Ferret::Document.new(i)
|
306
|
+
i += 1
|
307
|
+
doc[:date] = date
|
308
|
+
doc[:category] = category
|
309
|
+
doc[:field] = field
|
310
|
+
doc
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
SEARCH_TEST_DOCS = self.prepare_search_docs()
|
315
|
+
end
|