jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
@@ -0,0 +1,46 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class SampleLargeTest < Test::Unit::TestCase
4
+ include Ferret::Index
5
+ include Ferret::Search
6
+ include Ferret::Store
7
+ include Ferret::Utils
8
+
9
+ INDEX_DIR = File.dirname(__FILE__) + "/../../temp/largefile"
10
+ RECORDS = 750
11
+ RECORD_SIZE = 10e5
12
+
13
+ def setup
14
+ @index = Index.new(:path => INDEX_DIR, :create_if_missing => true, :key => :id)
15
+ create_index! if @index.size == 0 or ENV["RELOAD_LARGE_INDEX"]
16
+ end
17
+
18
+ def test_file_index_created
19
+ assert @index.size == RECORDS, "Index size should be #{RECORDS}, is #{@index.size}"
20
+ end
21
+
22
+ def test_keys_work
23
+ @index << {:content => "foo", :id => RECORDS - 4}
24
+ assert @index.size == RECORDS, "Index size should be #{RECORDS}, is #{@index.size}"
25
+ end
26
+
27
+ def test_read_file_after_two_gigs
28
+ assert @index.reader[RECORDS - 5].load.is_a?(Hash)
29
+ end
30
+
31
+ def create_index!
32
+ @@already_built_large_index ||= false
33
+ return if @@already_built_large_index
34
+ @@already_built_large_index = true
35
+ a = "a"
36
+ RECORDS.times { |i|
37
+ seq = (a.succ! + " ") * RECORD_SIZE
38
+ record = {:id => i, :content => seq}
39
+ @index << record
40
+ print "i"
41
+ STDOUT.flush
42
+ }
43
+ puts "o"
44
+ @index.optimize
45
+ end
46
+ end
data/test/test_all.rb ADDED
@@ -0,0 +1,5 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'test_helper.rb'
3
+
4
+ load_test_dir("unit")
5
+ #load_test_dir("functional")
@@ -0,0 +1,29 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ if $test_installed_gem
3
+ require 'rubygems'
4
+ require 'ferret'
5
+ else
6
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
7
+ $:.unshift File.join(File.dirname(__FILE__), '../ext')
8
+ end
9
+
10
+ ENV['LANG'] = "en_US.UTF-8"
11
+ ENV['LC_CTYPE'] = "en_US.UTF-8"
12
+
13
+ class Float
14
+ def approx_eql?(o)
15
+ return (1 - self/o).abs < 0.0001
16
+ end
17
+ alias :=~ :approx_eql?
18
+ end
19
+
20
+ require 'test/unit'
21
+ require 'ferret'
22
+ require 'unit/index/th_doc' if (defined?(IndexTestHelper).nil?)
23
+
24
+
25
+ def load_test_dir(dir)
26
+ Dir[File.join(File.dirname(__FILE__), dir, "t[scm]*.rb")].each do |file|
27
+ require file
28
+ end
29
+ end
@@ -0,0 +1 @@
1
+ $test_installed_gem = true
@@ -0,0 +1,132 @@
1
+ # Author: Matthew D Moss
2
+ #
3
+ # Writtern for ruby quiz #25
4
+ #
5
+ class JapaneseTranslator
6
+ # My knowledge of counting Japanese is limited, so this may not
7
+ # be entirely correct; in particular, I don't know what rules
8
+ # to follow after 'hyaku man' (1,000,000).
9
+ # I also combine a digit with its group, such as 'gohyaku' rather
10
+ # than 'go hyaku'; I just like reading it better that way.
11
+
12
+ DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu)
13
+ GROUPS = %w(nothingtoseeheremovealong ju hyaku sen)
14
+ MAN = 10000
15
+
16
+ def to_spoken(val)
17
+ case val <=> 0
18
+ when -1
19
+ '- ' + to_spoken(-val)
20
+ when 0
21
+ DIGITS[0]
22
+ else
23
+ group(val, 0)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def group(val, level)
30
+ if val >= MAN
31
+ group(val / MAN, 0) + 'man ' + group(val % MAN, 0)
32
+ else
33
+ case val
34
+ when 0
35
+ ''
36
+ when 1
37
+ level == 0 ? DIGITS[val] : GROUPS[level]
38
+ when 2...10
39
+ DIGITS[val] + (GROUPS[level] if level > 0).to_s
40
+ else
41
+ group(val / 10, level+1) + ' ' + group(val % 10, level)
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+
48
+ class USEnglishTranslator
49
+ # Formal, US English. Optional 'and'. Will not produce things
50
+ # such as 'twelve hundred' but rather 'one thousand two hundred'.
51
+ # The use of 'and' is incomplete; it is sometimes missed.
52
+
53
+ DIGITS = %w(zero one two three four five six seven eight nine)
54
+ TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen
55
+ seventeen eighteen nineteen)
56
+ TENS = %w(hello world twenty thirty forty fifty sixty seventy
57
+ eighty ninety)
58
+ GROUPS = %w(thousand million billion trillion quadrillion
59
+ quintillion sextillion septillion octillion nonillion
60
+ decillion)
61
+ K = 1000
62
+
63
+ def initialize(conjunction = true)
64
+ @conjunction = conjunction
65
+ end
66
+
67
+ def to_spoken(val)
68
+ case val <=> 0
69
+ when -1
70
+ 'negative ' + to_spoken(-val)
71
+ when 0
72
+ DIGITS[0]
73
+ else
74
+ group(val, 0).flatten.join(' ')
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def group(val, level)
81
+ x = group(val / K, level + 1) << GROUPS[level] if val >= K
82
+ x.to_a << under_1000(val % K, level)
83
+ end
84
+
85
+ def under_1000(val, level)
86
+ x = [DIGITS[val / 100]] << 'hundred' if val >= 100
87
+ x.to_a << under_100(val % 100, (level == 0 and not x.nil?))
88
+ end
89
+
90
+ def under_100(val, junction)
91
+ x = [('and' if @conjunction and junction)] # wyf?
92
+ case val
93
+ when 0
94
+ []
95
+ when 1...10
96
+ x << DIGITS[val]
97
+ when 10...20
98
+ x << TEENS[val - 10]
99
+ else
100
+ d = val % 10
101
+ x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s)
102
+ end
103
+ end
104
+ end
105
+
106
+
107
+ class Integer
108
+ def to_spoken(translator = USEnglishTranslator.new)
109
+ translator.to_spoken(self).squeeze(' ').strip
110
+ end
111
+ end
112
+
113
+ if $0 == __FILE__
114
+ SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99,
115
+ 100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610,
116
+ 1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283,
117
+ 2435489238, 234100090000, -42, -2001 ]
118
+
119
+ TRANSLATORS = { 'US English' => USEnglishTranslator.new,
120
+ 'Japanese' => JapaneseTranslator.new }
121
+
122
+
123
+ # main
124
+ TRANSLATORS.each do |lang, translator|
125
+ puts
126
+ puts lang
127
+ puts '-' * lang.length
128
+ SAMPLES.each do |val|
129
+ puts "%12d => %s" % [val, val.to_spoken(translator)]
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,88 @@
1
+ $:.unshift('.')
2
+ require 'monitor'
3
+ require File.dirname(__FILE__) + "/../test_helper"
4
+ require File.dirname(__FILE__) + "/number_to_spoken.rb"
5
+ require 'thread'
6
+
7
+ class IndexThreadSafetyTest < Test::Unit::TestCase
8
+ include Ferret::Index
9
+
10
+ INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
11
+ ITERATIONS = 100
12
+ NUM_THREADS = 3
13
+ ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
14
+
15
+ def setup
16
+ index = Index.new(:path => INDEX_DIR,
17
+ :create => true,
18
+ :analyzer => ANALYZER,
19
+ :default_field => :content)
20
+ index.close
21
+ end
22
+
23
+ def indexing_thread()
24
+ index = Index.new(:path => INDEX_DIR,
25
+ :analyzer => ANALYZER,
26
+ :auto_flush => true,
27
+ :default_field => :content)
28
+
29
+ ITERATIONS.times do
30
+ choice = rand()
31
+
32
+ if choice > 0.98
33
+ do_optimize(index)
34
+ elsif choice > 0.7
35
+ do_delete_doc(index)
36
+ elsif choice > 0.5
37
+ do_search(index)
38
+ else
39
+ do_add_doc(index)
40
+ end
41
+ index.commit
42
+ end
43
+ rescue Exception => e
44
+ puts e
45
+ puts e.backtrace
46
+ raise 'hell'
47
+ end
48
+
49
+ def do_optimize(index)
50
+ puts "Optimizing the index"
51
+ index.optimize
52
+ end
53
+
54
+ def do_delete_doc(index)
55
+ return if index.size == 0
56
+ doc_num = rand(index.size)
57
+ puts "Deleting #{doc_num} from index which has#{index.has_deletions? ? "" : " no"} deletions"
58
+ puts "document was already deleted" if (index.deleted?(doc_num))
59
+ index.delete(doc_num)
60
+ end
61
+
62
+ def do_add_doc(index)
63
+ n = rand(0xFFFFFFFF)
64
+ d = {:id => n, :content => n.to_spoken}
65
+ puts("Adding #{n}")
66
+ index << d
67
+ end
68
+
69
+ def do_search(index)
70
+ n = rand(0xFFFFFFFF)
71
+ puts("Searching for #{n}")
72
+ hits = index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
73
+ puts "Hit for #{n}: #{index[d][:id]} - #{s}"
74
+ end
75
+ puts("Searched for #{n}: total = #{hits}")
76
+ end
77
+
78
+ def test_threading
79
+ threads = []
80
+ NUM_THREADS.times do
81
+ threads << Thread.new { indexing_thread }
82
+ end
83
+
84
+ threads.each {|t|
85
+ t.join
86
+ }
87
+ end
88
+ end
@@ -0,0 +1,73 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+ require File.dirname(__FILE__) + "/number_to_spoken.rb"
3
+ require 'thread'
4
+
5
+ class IndexThreadSafetyReadWriteTest < Test::Unit::TestCase
6
+ include Ferret::Index
7
+
8
+ INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
9
+ ITERATIONS = 10000
10
+ ANALYZER = Ferret::Analysis::Analyzer.new()
11
+
12
+ def setup
13
+ @index = Index.new(:path => INDEX_DIR,
14
+ :create => true,
15
+ :analyzer => ANALYZER,
16
+ :default_field => :content)
17
+ end
18
+
19
+ def search_thread()
20
+ ITERATIONS.times do
21
+ do_search()
22
+ sleep(rand(1))
23
+ end
24
+ rescue => e
25
+ puts e
26
+ puts e.backtrace
27
+ @index = nil
28
+ raise e
29
+ end
30
+
31
+ def index_thread()
32
+ ITERATIONS.times do
33
+ do_add_doc()
34
+ sleep(rand(1))
35
+ end
36
+ rescue => e
37
+ puts e
38
+ puts e.backtrace
39
+ @index = nil
40
+ raise e
41
+ end
42
+
43
+ def do_add_doc
44
+ n = rand(0xFFFFFFFF)
45
+ d = {:id => n.to_s, :content => n.to_spoken}
46
+ puts("Adding #{n}")
47
+ begin
48
+ @index << d
49
+ rescue => e
50
+ puts e
51
+ puts e.backtrace
52
+ @index = nil
53
+ raise e
54
+ end
55
+ end
56
+
57
+ def do_search
58
+ n = rand(0xFFFFFFFF)
59
+ puts("Searching for #{n}")
60
+ hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
61
+ puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
62
+ end
63
+ puts("Searched for #{n}: total = #{hits}")
64
+ end
65
+
66
+ def test_threading
67
+ threads = []
68
+ threads << Thread.new { search_thread }
69
+ threads << Thread.new { index_thread }
70
+
71
+ threads.each { |t| t.join }
72
+ end
73
+ end
@@ -0,0 +1,133 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+ require File.join(File.dirname(__FILE__), "number_to_spoken.rb")
3
+ require 'thread'
4
+
5
+ class ThreadSafetyTest
6
+ include Ferret::Index
7
+ include Ferret::Search
8
+ include Ferret::Store
9
+ include Ferret
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ end
14
+
15
+ INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
16
+ ANALYZER = Ferret::Analysis::WhiteSpaceAnalyzer.new()
17
+ ITERATIONS = 1000
18
+ QUERY_PARSER = Ferret::QueryParser.new(:analyzer => ANALYZER,
19
+ :default_field => 'contents')
20
+ @@searcher = nil
21
+
22
+ def run_index_thread(writer)
23
+ reopen_interval = 30 + rand(60)
24
+
25
+ use_compound_file = false
26
+
27
+ (400*ITERATIONS).times do |i|
28
+ n = rand(0xFFFFFFFF)
29
+ d = {:id => n.to_s, :contents => n.to_spoken}
30
+ puts("Adding #{n}")
31
+
32
+ # Switch between single and multiple file segments
33
+ use_compound_file = (rand < 0.5)
34
+ writer.use_compound_file = use_compound_file
35
+
36
+ writer << d
37
+
38
+ if (i % reopen_interval == 0)
39
+ writer.close()
40
+ writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER)
41
+ end
42
+ end
43
+
44
+ writer.close()
45
+ rescue => e
46
+ puts e
47
+ puts e.backtrace
48
+ raise e
49
+ end
50
+
51
+ def run_search_thread(use_global)
52
+ reopen_interval = 10 + rand(20)
53
+
54
+ unless use_global
55
+ searcher = Searcher.new(INDEX_DIR)
56
+ end
57
+
58
+ (50*ITERATIONS).times do |i|
59
+ search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
60
+ if (i%reopen_interval == 0)
61
+ if (searcher == nil)
62
+ @@searcher = Searcher.new(INDEX_DIR)
63
+ else
64
+ searcher.close()
65
+ searcher = Searcher.new(INDEX_DIR)
66
+ end
67
+ end
68
+ end
69
+ rescue => e
70
+ puts e
71
+ puts e.backtrace
72
+ raise e
73
+ end
74
+
75
+ def search_for(n, searcher)
76
+ puts("Searching for #{n}")
77
+ topdocs = searcher.search(QUERY_PARSER.parse(n.to_spoken), :limit => 3)
78
+ puts("Search for #{n}: total = #{topdocs.total_hits}")
79
+ topdocs.hits.each do |hit|
80
+ puts "Hit for #{n}: #{searcher.reader[hit.doc]["id"]} - #{hit.score}"
81
+ end
82
+ end
83
+
84
+ def run_test_threads
85
+ threads = []
86
+ unless @options[:read_only]
87
+ writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER,
88
+ :create => !@options[:add])
89
+
90
+ threads << Thread.new { run_index_thread(writer) }
91
+ sleep(1)
92
+ end
93
+
94
+ threads << Thread.new { run_search_thread(false)}
95
+
96
+ @@searcher = Searcher.new(INDEX_DIR)
97
+ threads << Thread.new { run_search_thread(true)}
98
+
99
+ threads << Thread.new { run_search_thread(true)}
100
+
101
+ threads.each {|t| t.join}
102
+ end
103
+ end
104
+
105
+
106
+ if $0 == __FILE__
107
+ require 'optparse'
108
+
109
+ OPTIONS = {
110
+ :all => false,
111
+ :read_only => false,
112
+ }
113
+
114
+ ARGV.options do |opts|
115
+ script_name = File.basename($0)
116
+ opts.banner = "Usage: ruby #{script_name} [options]"
117
+
118
+ opts.separator ""
119
+
120
+ opts.on("-r", "--read-only", "Read Only.") { OPTIONS[:all] = true }
121
+ opts.on("-a", "--all", "All.") { OPTIONS[:read_only] = true }
122
+
123
+ opts.separator ""
124
+
125
+ opts.on("-h", "--help",
126
+ "Show this help message.") { puts opts; exit }
127
+
128
+ opts.parse!
129
+ end
130
+
131
+ tst = ThreadSafetyTest.new(OPTIONS)
132
+ tst.run_test_threads
133
+ end