jk-ferret 0.11.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +90 -0
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +443 -0
- data/TODO +109 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/STEMMER_api.c +66 -0
- data/ext/STEMMER_libstemmer.c +93 -0
- data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
- data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
- data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/STEMMER_stem_UTF_8_german.c +509 -0
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/STEMMER_utilities.c +478 -0
- data/ext/analysis.c +1710 -0
- data/ext/analysis.h +266 -0
- data/ext/api.h +26 -0
- data/ext/array.c +125 -0
- data/ext/array.h +62 -0
- data/ext/bitvector.c +96 -0
- data/ext/bitvector.h +594 -0
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +384 -0
- data/ext/config.h +52 -0
- data/ext/document.c +159 -0
- data/ext/document.h +63 -0
- data/ext/except.c +102 -0
- data/ext/except.h +176 -0
- data/ext/extconf.rb +15 -0
- data/ext/ferret.c +416 -0
- data/ext/ferret.h +94 -0
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +157 -0
- data/ext/fs_store.c +493 -0
- data/ext/global.c +458 -0
- data/ext/global.h +302 -0
- data/ext/hash.c +524 -0
- data/ext/hash.h +515 -0
- data/ext/hashset.c +192 -0
- data/ext/hashset.h +215 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +63 -0
- data/ext/helper.h +21 -0
- data/ext/index.c +6804 -0
- data/ext/index.h +935 -0
- data/ext/internal.h +1019 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +68 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +88 -0
- data/ext/mempool.h +43 -0
- data/ext/modules.h +190 -0
- data/ext/multimapper.c +351 -0
- data/ext/multimapper.h +60 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +973 -0
- data/ext/priorityqueue.c +149 -0
- data/ext/priorityqueue.h +155 -0
- data/ext/q_boolean.c +1621 -0
- data/ext/q_const_score.c +162 -0
- data/ext/q_filtered_query.c +212 -0
- data/ext/q_fuzzy.c +280 -0
- data/ext/q_match_all.c +149 -0
- data/ext/q_multi_term.c +673 -0
- data/ext/q_parser.c +3103 -0
- data/ext/q_phrase.c +1206 -0
- data/ext/q_prefix.c +98 -0
- data/ext/q_range.c +682 -0
- data/ext/q_span.c +2390 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +167 -0
- data/ext/r_analysis.c +2626 -0
- data/ext/r_index.c +3468 -0
- data/ext/r_qparser.c +635 -0
- data/ext/r_search.c +4490 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +1131 -0
- data/ext/ram_store.c +476 -0
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +1864 -0
- data/ext/search.h +953 -0
- data/ext/similarity.c +151 -0
- data/ext/similarity.h +89 -0
- data/ext/sort.c +786 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +410 -0
- data/ext/store.c +698 -0
- data/ext/store.h +799 -0
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +73 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +62 -0
- data/lib/ferret.rb +30 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/field_symbol.rb +87 -0
- data/lib/ferret/index.rb +973 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret/version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/long_running/largefile/tc_largefile.rb +46 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +29 -0
- data/test/test_installed.rb +1 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +88 -0
- data/test/threading/thread_safety_read_write_test.rb +73 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +550 -0
- data/test/unit/analysis/tc_token_stream.rb +653 -0
- data/test/unit/index/tc_index.rb +867 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +447 -0
- data/test/unit/index/th_doc.rb +332 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +156 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +67 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +436 -0
- data/test/unit/store/tc_fs_store.rb +115 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +319 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class SampleLargeTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Index
|
5
|
+
include Ferret::Search
|
6
|
+
include Ferret::Store
|
7
|
+
include Ferret::Utils
|
8
|
+
|
9
|
+
INDEX_DIR = File.dirname(__FILE__) + "/../../temp/largefile"
|
10
|
+
RECORDS = 750
|
11
|
+
RECORD_SIZE = 10e5
|
12
|
+
|
13
|
+
def setup
|
14
|
+
@index = Index.new(:path => INDEX_DIR, :create_if_missing => true, :key => :id)
|
15
|
+
create_index! if @index.size == 0 or ENV["RELOAD_LARGE_INDEX"]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_file_index_created
|
19
|
+
assert @index.size == RECORDS, "Index size should be #{RECORDS}, is #{@index.size}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_keys_work
|
23
|
+
@index << {:content => "foo", :id => RECORDS - 4}
|
24
|
+
assert @index.size == RECORDS, "Index size should be #{RECORDS}, is #{@index.size}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_read_file_after_two_gigs
|
28
|
+
assert @index.reader[RECORDS - 5].load.is_a?(Hash)
|
29
|
+
end
|
30
|
+
|
31
|
+
def create_index!
|
32
|
+
@@already_built_large_index ||= false
|
33
|
+
return if @@already_built_large_index
|
34
|
+
@@already_built_large_index = true
|
35
|
+
a = "a"
|
36
|
+
RECORDS.times { |i|
|
37
|
+
seq = (a.succ! + " ") * RECORD_SIZE
|
38
|
+
record = {:id => i, :content => seq}
|
39
|
+
@index << record
|
40
|
+
print "i"
|
41
|
+
STDOUT.flush
|
42
|
+
}
|
43
|
+
puts "o"
|
44
|
+
@index.optimize
|
45
|
+
end
|
46
|
+
end
|
data/test/test_all.rb
ADDED
data/test/test_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
if $test_installed_gem
|
3
|
+
require 'rubygems'
|
4
|
+
require 'ferret'
|
5
|
+
else
|
6
|
+
$:.unshift File.join(File.dirname(__FILE__), '../lib')
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), '../ext')
|
8
|
+
end
|
9
|
+
|
10
|
+
ENV['LANG'] = "en_US.UTF-8"
|
11
|
+
ENV['LC_CTYPE'] = "en_US.UTF-8"
|
12
|
+
|
13
|
+
class Float
|
14
|
+
def approx_eql?(o)
|
15
|
+
return (1 - self/o).abs < 0.0001
|
16
|
+
end
|
17
|
+
alias :=~ :approx_eql?
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'test/unit'
|
21
|
+
require 'ferret'
|
22
|
+
require 'unit/index/th_doc' if (defined?(IndexTestHelper).nil?)
|
23
|
+
|
24
|
+
|
25
|
+
def load_test_dir(dir)
|
26
|
+
Dir[File.join(File.dirname(__FILE__), dir, "t[scm]*.rb")].each do |file|
|
27
|
+
require file
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
$test_installed_gem = true
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# Author: Matthew D Moss
|
2
|
+
#
|
3
|
+
# Writtern for ruby quiz #25
|
4
|
+
#
|
5
|
+
class JapaneseTranslator
|
6
|
+
# My knowledge of counting Japanese is limited, so this may not
|
7
|
+
# be entirely correct; in particular, I don't know what rules
|
8
|
+
# to follow after 'hyaku man' (1,000,000).
|
9
|
+
# I also combine a digit with its group, such as 'gohyaku' rather
|
10
|
+
# than 'go hyaku'; I just like reading it better that way.
|
11
|
+
|
12
|
+
DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu)
|
13
|
+
GROUPS = %w(nothingtoseeheremovealong ju hyaku sen)
|
14
|
+
MAN = 10000
|
15
|
+
|
16
|
+
def to_spoken(val)
|
17
|
+
case val <=> 0
|
18
|
+
when -1
|
19
|
+
'- ' + to_spoken(-val)
|
20
|
+
when 0
|
21
|
+
DIGITS[0]
|
22
|
+
else
|
23
|
+
group(val, 0)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def group(val, level)
|
30
|
+
if val >= MAN
|
31
|
+
group(val / MAN, 0) + 'man ' + group(val % MAN, 0)
|
32
|
+
else
|
33
|
+
case val
|
34
|
+
when 0
|
35
|
+
''
|
36
|
+
when 1
|
37
|
+
level == 0 ? DIGITS[val] : GROUPS[level]
|
38
|
+
when 2...10
|
39
|
+
DIGITS[val] + (GROUPS[level] if level > 0).to_s
|
40
|
+
else
|
41
|
+
group(val / 10, level+1) + ' ' + group(val % 10, level)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
class USEnglishTranslator
|
49
|
+
# Formal, US English. Optional 'and'. Will not produce things
|
50
|
+
# such as 'twelve hundred' but rather 'one thousand two hundred'.
|
51
|
+
# The use of 'and' is incomplete; it is sometimes missed.
|
52
|
+
|
53
|
+
DIGITS = %w(zero one two three four five six seven eight nine)
|
54
|
+
TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen
|
55
|
+
seventeen eighteen nineteen)
|
56
|
+
TENS = %w(hello world twenty thirty forty fifty sixty seventy
|
57
|
+
eighty ninety)
|
58
|
+
GROUPS = %w(thousand million billion trillion quadrillion
|
59
|
+
quintillion sextillion septillion octillion nonillion
|
60
|
+
decillion)
|
61
|
+
K = 1000
|
62
|
+
|
63
|
+
def initialize(conjunction = true)
|
64
|
+
@conjunction = conjunction
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_spoken(val)
|
68
|
+
case val <=> 0
|
69
|
+
when -1
|
70
|
+
'negative ' + to_spoken(-val)
|
71
|
+
when 0
|
72
|
+
DIGITS[0]
|
73
|
+
else
|
74
|
+
group(val, 0).flatten.join(' ')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def group(val, level)
|
81
|
+
x = group(val / K, level + 1) << GROUPS[level] if val >= K
|
82
|
+
x.to_a << under_1000(val % K, level)
|
83
|
+
end
|
84
|
+
|
85
|
+
def under_1000(val, level)
|
86
|
+
x = [DIGITS[val / 100]] << 'hundred' if val >= 100
|
87
|
+
x.to_a << under_100(val % 100, (level == 0 and not x.nil?))
|
88
|
+
end
|
89
|
+
|
90
|
+
def under_100(val, junction)
|
91
|
+
x = [('and' if @conjunction and junction)] # wyf?
|
92
|
+
case val
|
93
|
+
when 0
|
94
|
+
[]
|
95
|
+
when 1...10
|
96
|
+
x << DIGITS[val]
|
97
|
+
when 10...20
|
98
|
+
x << TEENS[val - 10]
|
99
|
+
else
|
100
|
+
d = val % 10
|
101
|
+
x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
class Integer
|
108
|
+
def to_spoken(translator = USEnglishTranslator.new)
|
109
|
+
translator.to_spoken(self).squeeze(' ').strip
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
if $0 == __FILE__
|
114
|
+
SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99,
|
115
|
+
100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610,
|
116
|
+
1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283,
|
117
|
+
2435489238, 234100090000, -42, -2001 ]
|
118
|
+
|
119
|
+
TRANSLATORS = { 'US English' => USEnglishTranslator.new,
|
120
|
+
'Japanese' => JapaneseTranslator.new }
|
121
|
+
|
122
|
+
|
123
|
+
# main
|
124
|
+
TRANSLATORS.each do |lang, translator|
|
125
|
+
puts
|
126
|
+
puts lang
|
127
|
+
puts '-' * lang.length
|
128
|
+
SAMPLES.each do |val|
|
129
|
+
puts "%12d => %s" % [val, val.to_spoken(translator)]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
$:.unshift('.')
|
2
|
+
require 'monitor'
|
3
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
4
|
+
require File.dirname(__FILE__) + "/number_to_spoken.rb"
|
5
|
+
require 'thread'
|
6
|
+
|
7
|
+
class IndexThreadSafetyTest < Test::Unit::TestCase
|
8
|
+
include Ferret::Index
|
9
|
+
|
10
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
11
|
+
ITERATIONS = 100
|
12
|
+
NUM_THREADS = 3
|
13
|
+
ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
|
14
|
+
|
15
|
+
def setup
|
16
|
+
index = Index.new(:path => INDEX_DIR,
|
17
|
+
:create => true,
|
18
|
+
:analyzer => ANALYZER,
|
19
|
+
:default_field => :content)
|
20
|
+
index.close
|
21
|
+
end
|
22
|
+
|
23
|
+
def indexing_thread()
|
24
|
+
index = Index.new(:path => INDEX_DIR,
|
25
|
+
:analyzer => ANALYZER,
|
26
|
+
:auto_flush => true,
|
27
|
+
:default_field => :content)
|
28
|
+
|
29
|
+
ITERATIONS.times do
|
30
|
+
choice = rand()
|
31
|
+
|
32
|
+
if choice > 0.98
|
33
|
+
do_optimize(index)
|
34
|
+
elsif choice > 0.7
|
35
|
+
do_delete_doc(index)
|
36
|
+
elsif choice > 0.5
|
37
|
+
do_search(index)
|
38
|
+
else
|
39
|
+
do_add_doc(index)
|
40
|
+
end
|
41
|
+
index.commit
|
42
|
+
end
|
43
|
+
rescue Exception => e
|
44
|
+
puts e
|
45
|
+
puts e.backtrace
|
46
|
+
raise 'hell'
|
47
|
+
end
|
48
|
+
|
49
|
+
def do_optimize(index)
|
50
|
+
puts "Optimizing the index"
|
51
|
+
index.optimize
|
52
|
+
end
|
53
|
+
|
54
|
+
def do_delete_doc(index)
|
55
|
+
return if index.size == 0
|
56
|
+
doc_num = rand(index.size)
|
57
|
+
puts "Deleting #{doc_num} from index which has#{index.has_deletions? ? "" : " no"} deletions"
|
58
|
+
puts "document was already deleted" if (index.deleted?(doc_num))
|
59
|
+
index.delete(doc_num)
|
60
|
+
end
|
61
|
+
|
62
|
+
def do_add_doc(index)
|
63
|
+
n = rand(0xFFFFFFFF)
|
64
|
+
d = {:id => n, :content => n.to_spoken}
|
65
|
+
puts("Adding #{n}")
|
66
|
+
index << d
|
67
|
+
end
|
68
|
+
|
69
|
+
def do_search(index)
|
70
|
+
n = rand(0xFFFFFFFF)
|
71
|
+
puts("Searching for #{n}")
|
72
|
+
hits = index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
|
73
|
+
puts "Hit for #{n}: #{index[d][:id]} - #{s}"
|
74
|
+
end
|
75
|
+
puts("Searched for #{n}: total = #{hits}")
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_threading
|
79
|
+
threads = []
|
80
|
+
NUM_THREADS.times do
|
81
|
+
threads << Thread.new { indexing_thread }
|
82
|
+
end
|
83
|
+
|
84
|
+
threads.each {|t|
|
85
|
+
t.join
|
86
|
+
}
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class IndexThreadSafetyReadWriteTest < Test::Unit::TestCase
|
6
|
+
include Ferret::Index
|
7
|
+
|
8
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
9
|
+
ITERATIONS = 10000
|
10
|
+
ANALYZER = Ferret::Analysis::Analyzer.new()
|
11
|
+
|
12
|
+
def setup
|
13
|
+
@index = Index.new(:path => INDEX_DIR,
|
14
|
+
:create => true,
|
15
|
+
:analyzer => ANALYZER,
|
16
|
+
:default_field => :content)
|
17
|
+
end
|
18
|
+
|
19
|
+
def search_thread()
|
20
|
+
ITERATIONS.times do
|
21
|
+
do_search()
|
22
|
+
sleep(rand(1))
|
23
|
+
end
|
24
|
+
rescue => e
|
25
|
+
puts e
|
26
|
+
puts e.backtrace
|
27
|
+
@index = nil
|
28
|
+
raise e
|
29
|
+
end
|
30
|
+
|
31
|
+
def index_thread()
|
32
|
+
ITERATIONS.times do
|
33
|
+
do_add_doc()
|
34
|
+
sleep(rand(1))
|
35
|
+
end
|
36
|
+
rescue => e
|
37
|
+
puts e
|
38
|
+
puts e.backtrace
|
39
|
+
@index = nil
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
|
43
|
+
def do_add_doc
|
44
|
+
n = rand(0xFFFFFFFF)
|
45
|
+
d = {:id => n.to_s, :content => n.to_spoken}
|
46
|
+
puts("Adding #{n}")
|
47
|
+
begin
|
48
|
+
@index << d
|
49
|
+
rescue => e
|
50
|
+
puts e
|
51
|
+
puts e.backtrace
|
52
|
+
@index = nil
|
53
|
+
raise e
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def do_search
|
58
|
+
n = rand(0xFFFFFFFF)
|
59
|
+
puts("Searching for #{n}")
|
60
|
+
hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
|
61
|
+
puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
|
62
|
+
end
|
63
|
+
puts("Searched for #{n}: total = #{hits}")
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_threading
|
67
|
+
threads = []
|
68
|
+
threads << Thread.new { search_thread }
|
69
|
+
threads << Thread.new { index_thread }
|
70
|
+
|
71
|
+
threads.each { |t| t.join }
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.join(File.dirname(__FILE__), "number_to_spoken.rb")
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class ThreadSafetyTest
|
6
|
+
include Ferret::Index
|
7
|
+
include Ferret::Search
|
8
|
+
include Ferret::Store
|
9
|
+
include Ferret
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
16
|
+
ANALYZER = Ferret::Analysis::WhiteSpaceAnalyzer.new()
|
17
|
+
ITERATIONS = 1000
|
18
|
+
QUERY_PARSER = Ferret::QueryParser.new(:analyzer => ANALYZER,
|
19
|
+
:default_field => 'contents')
|
20
|
+
@@searcher = nil
|
21
|
+
|
22
|
+
def run_index_thread(writer)
|
23
|
+
reopen_interval = 30 + rand(60)
|
24
|
+
|
25
|
+
use_compound_file = false
|
26
|
+
|
27
|
+
(400*ITERATIONS).times do |i|
|
28
|
+
n = rand(0xFFFFFFFF)
|
29
|
+
d = {:id => n.to_s, :contents => n.to_spoken}
|
30
|
+
puts("Adding #{n}")
|
31
|
+
|
32
|
+
# Switch between single and multiple file segments
|
33
|
+
use_compound_file = (rand < 0.5)
|
34
|
+
writer.use_compound_file = use_compound_file
|
35
|
+
|
36
|
+
writer << d
|
37
|
+
|
38
|
+
if (i % reopen_interval == 0)
|
39
|
+
writer.close()
|
40
|
+
writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
writer.close()
|
45
|
+
rescue => e
|
46
|
+
puts e
|
47
|
+
puts e.backtrace
|
48
|
+
raise e
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_search_thread(use_global)
|
52
|
+
reopen_interval = 10 + rand(20)
|
53
|
+
|
54
|
+
unless use_global
|
55
|
+
searcher = Searcher.new(INDEX_DIR)
|
56
|
+
end
|
57
|
+
|
58
|
+
(50*ITERATIONS).times do |i|
|
59
|
+
search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
|
60
|
+
if (i%reopen_interval == 0)
|
61
|
+
if (searcher == nil)
|
62
|
+
@@searcher = Searcher.new(INDEX_DIR)
|
63
|
+
else
|
64
|
+
searcher.close()
|
65
|
+
searcher = Searcher.new(INDEX_DIR)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
rescue => e
|
70
|
+
puts e
|
71
|
+
puts e.backtrace
|
72
|
+
raise e
|
73
|
+
end
|
74
|
+
|
75
|
+
def search_for(n, searcher)
|
76
|
+
puts("Searching for #{n}")
|
77
|
+
topdocs = searcher.search(QUERY_PARSER.parse(n.to_spoken), :limit => 3)
|
78
|
+
puts("Search for #{n}: total = #{topdocs.total_hits}")
|
79
|
+
topdocs.hits.each do |hit|
|
80
|
+
puts "Hit for #{n}: #{searcher.reader[hit.doc]["id"]} - #{hit.score}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def run_test_threads
|
85
|
+
threads = []
|
86
|
+
unless @options[:read_only]
|
87
|
+
writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER,
|
88
|
+
:create => !@options[:add])
|
89
|
+
|
90
|
+
threads << Thread.new { run_index_thread(writer) }
|
91
|
+
sleep(1)
|
92
|
+
end
|
93
|
+
|
94
|
+
threads << Thread.new { run_search_thread(false)}
|
95
|
+
|
96
|
+
@@searcher = Searcher.new(INDEX_DIR)
|
97
|
+
threads << Thread.new { run_search_thread(true)}
|
98
|
+
|
99
|
+
threads << Thread.new { run_search_thread(true)}
|
100
|
+
|
101
|
+
threads.each {|t| t.join}
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
if $0 == __FILE__
|
107
|
+
require 'optparse'
|
108
|
+
|
109
|
+
OPTIONS = {
|
110
|
+
:all => false,
|
111
|
+
:read_only => false,
|
112
|
+
}
|
113
|
+
|
114
|
+
ARGV.options do |opts|
|
115
|
+
script_name = File.basename($0)
|
116
|
+
opts.banner = "Usage: ruby #{script_name} [options]"
|
117
|
+
|
118
|
+
opts.separator ""
|
119
|
+
|
120
|
+
opts.on("-r", "--read-only", "Read Only.") { OPTIONS[:all] = true }
|
121
|
+
opts.on("-a", "--all", "All.") { OPTIONS[:read_only] = true }
|
122
|
+
|
123
|
+
opts.separator ""
|
124
|
+
|
125
|
+
opts.on("-h", "--help",
|
126
|
+
"Show this help message.") { puts opts; exit }
|
127
|
+
|
128
|
+
opts.parse!
|
129
|
+
end
|
130
|
+
|
131
|
+
tst = ThreadSafetyTest.new(OPTIONS)
|
132
|
+
tst.run_test_threads
|
133
|
+
end
|