ferret 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +1 -1
  2. data/TODO +3 -0
  3. data/ext/dummy.exe +0 -0
  4. data/lib/ferret.rb +1 -1
  5. data/lib/ferret/analysis/token.rb +6 -0
  6. data/lib/ferret/analysis/tokenizers.rb +5 -5
  7. data/lib/ferret/document/document.rb +10 -13
  8. data/lib/ferret/index/compound_file_io.rb +12 -9
  9. data/lib/ferret/index/field_infos.rb +0 -6
  10. data/lib/ferret/index/index.rb +220 -102
  11. data/lib/ferret/index/index_reader.rb +22 -2
  12. data/lib/ferret/index/index_writer.rb +55 -14
  13. data/lib/ferret/index/multi_reader.rb +279 -279
  14. data/lib/ferret/index/segment_infos.rb +3 -3
  15. data/lib/ferret/index/segment_merger.rb +7 -6
  16. data/lib/ferret/index/segment_reader.rb +23 -7
  17. data/lib/ferret/index/segment_term_enum.rb +6 -7
  18. data/lib/ferret/index/term_buffer.rb +3 -5
  19. data/lib/ferret/index/term_doc_enum.rb +7 -2
  20. data/lib/ferret/index/term_infos_io.rb +15 -8
  21. data/lib/ferret/query_parser/query_parser.tab.rb +49 -45
  22. data/lib/ferret/search/boolean_query.rb +3 -4
  23. data/lib/ferret/search/boolean_scorer.rb +11 -11
  24. data/lib/ferret/search/caching_wrapper_filter.rb +1 -1
  25. data/lib/ferret/search/disjunction_sum_scorer.rb +9 -7
  26. data/lib/ferret/search/field_cache.rb +1 -2
  27. data/lib/ferret/search/field_sorted_hit_queue.rb +1 -1
  28. data/lib/ferret/search/fuzzy_term_enum.rb +64 -58
  29. data/lib/ferret/search/index_searcher.rb +16 -9
  30. data/lib/ferret/search/prefix_query.rb +7 -0
  31. data/lib/ferret/search/query_filter.rb +1 -1
  32. data/lib/ferret/search/term_scorer.rb +5 -1
  33. data/lib/ferret/search/top_docs.rb +12 -0
  34. data/lib/ferret/store/buffered_index_io.rb +5 -6
  35. data/lib/ferret/store/fs_store.rb +47 -33
  36. data/lib/ferret/store/ram_store.rb +2 -2
  37. data/lib/ferret/utils.rb +1 -0
  38. data/lib/ferret/utils/bit_vector.rb +20 -2
  39. data/lib/ferret/utils/thread_local.rb +28 -0
  40. data/lib/ferret/utils/weak_key_hash.rb +11 -2
  41. data/test/benchmark/tb_rw_vint.rb +1 -1
  42. data/test/functional/thread_safety_index_test.rb +81 -0
  43. data/test/functional/thread_safety_test.rb +137 -0
  44. data/test/test_all.rb +3 -7
  45. data/test/test_helper.rb +2 -1
  46. data/test/unit/index/tc_compound_file_io.rb +2 -2
  47. data/test/unit/index/tc_index.rb +128 -6
  48. data/test/unit/index/tc_index_reader.rb +1 -1
  49. data/test/unit/index/tc_segment_infos.rb +1 -1
  50. data/test/unit/index/th_doc.rb +1 -1
  51. data/test/unit/search/tc_index_searcher.rb +6 -0
  52. data/test/unit/store/tc_fs_store.rb +3 -3
  53. data/test/unit/utils/tc_bit_vector.rb +8 -0
  54. data/test/unit/utils/tc_thread.rb +61 -0
  55. data/test/unit/utils/tc_weak_key_hash.rb +2 -2
  56. data/test/utils/number_to_spoken.rb +132 -0
  57. metadata +7 -2
@@ -597,7 +597,7 @@ class IndexReaderTest < Test::Unit::TestCase
597
597
  def test_ir_read_while_optimizing_on_disk()
598
598
  dpath = File.join(File.dirname(__FILE__),
599
599
  '../../temp/fsdir')
600
- fs_dir = Ferret::Store::FSDirectory.get_directory(dpath, true)
600
+ fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
601
601
 
602
602
  iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
603
603
  docs = IndexTestHelper.prepare_ir_test_docs()
@@ -63,7 +63,7 @@ class SegmentInfoTest < Test::Unit::TestCase
63
63
  assert_equal(si.name, "seg1")
64
64
  @dir.close()
65
65
  @dpath = File.dirname(__FILE__) + '/../../temp/fsdir'
66
- @dir = Ferret::Store::FSDirectory.get_directory(@dpath, true)
66
+ @dir = Ferret::Store::FSDirectory.new(@dpath, true)
67
67
  si.name = "seg2"
68
68
  si.doc_count += 2
69
69
  si.directory = @dir
@@ -234,7 +234,7 @@ module IndexTestHelper
234
234
  doc.boost = i+1
235
235
 
236
236
  fields.each_pair do |field, text|
237
- doc << Field.new(field, text, Field::Store::NO, Field::Index::TOKENIZED, Field::TermVector::NO, i+1)
237
+ doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
238
238
  end
239
239
  docs << doc
240
240
  end
@@ -46,6 +46,12 @@ class IndexSearcherTest < Test::Unit::TestCase
46
46
  end
47
47
  end
48
48
 
49
+ def test_get_doc()
50
+ assert_equal(18, @is.max_doc)
51
+ assert_equal("20050930", @is.doc(0).values(:date))
52
+ assert_equal("cat1/sub2/subsub2", @is.doc(4)[:cat])
53
+ end
54
+
49
55
  def test_term_query
50
56
  tq = TermQuery.new(Term.new("field", "word2"));
51
57
  tq.boost = 100
@@ -26,7 +26,7 @@ class FSStoreTest < Test::Unit::TestCase
26
26
  def setup
27
27
  @dpath = File.join(File.dirname(__FILE__),
28
28
  '../../temp/fsdir')
29
- @dir = FSDirectory.get_directory(@dpath, true)
29
+ @dir = FSDirectory.new(@dpath, true)
30
30
  end
31
31
 
32
32
  def teardown
@@ -39,12 +39,12 @@ class FSStoreTest < Test::Unit::TestCase
39
39
  '/../../temp/cachetest')
40
40
  assert(! FSDirectory.directory_cache[dir_path],
41
41
  "this directory should not be cached yet")
42
- @dir1 = FSDirectory.get_directory(dir_path, true)
42
+ @dir1 = FSDirectory.new(dir_path, true)
43
43
  assert(FSDirectory.directory_cache[dir_path],
44
44
  "this directory should now be cached")
45
45
  assert_equal(@dir1.ref_count, 1,
46
46
  "There is one reference so the refcount should now be 1")
47
- @dir2 = FSDirectory.get_directory(dir_path, true)
47
+ @dir2 = FSDirectory.new(dir_path, true)
48
48
  assert(@dir1 === @dir2,
49
49
  "The directory should be cached so the same directory object should have been returned")
50
50
  assert_equal(@dir1.ref_count, 2,
@@ -4,6 +4,14 @@ require File.dirname(__FILE__) + "/../../test_helper"
4
4
  class BitVectorTest < Test::Unit::TestCase
5
5
  include Ferret::Utils
6
6
 
7
+ def test_bignum_conversion()
8
+ j = 256
9
+ 10.times do
10
+ j *= j
11
+ assert_equal(j, BitVector.string_to_bignum(BitVector.bignum_to_string(j)))
12
+ end
13
+ end
14
+
7
15
  def test_bv()
8
16
  bv = BitVector.new
9
17
  assert_equal(0, bv.count)
@@ -0,0 +1,61 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require 'thread'
3
+
4
+
5
+ class ThreadTest < Test::Unit::TestCase
6
+ include Ferret::Utils
7
+
8
+ NUM_THREADS = 100
9
+
10
+ def test_basic_get_and_set()
11
+ Thread.current.clear_local
12
+ b = "hello"
13
+ Thread.current.set_local(b, "dave")
14
+ assert_equal("dave", Thread.current.get_local(b))
15
+ end
16
+
17
+ def test_objects_die
18
+ Thread.current.clear_local
19
+ a = []
20
+ 10.times {|i| a[i] = "#{i}"; Thread.current.set_local(a[i], i) }
21
+ 10.times {|i| assert_equal(i, Thread.current.get_local(a[i])) }
22
+ assert_equal(10, Thread.current.local_size)
23
+ GC.start
24
+ assert_equal(10, Thread.current.local_size)
25
+ 10.times {|i| a[i] = nil; }
26
+ #puts w
27
+
28
+ # this is a hack to get the GC to collect the last ref created above
29
+ x = WeakKeyHash.new()
30
+ 10.times {|i| a[i] = "#{i}"; x[a[i]] = i }
31
+
32
+ assert_equal(10, Thread.current.local_size)
33
+ GC.start
34
+ assert(0, Thread.current.local_size)
35
+ end
36
+
37
+ class ThreadTester
38
+ def initialize(val)
39
+ Thread.current.set_local(self, val)
40
+ end
41
+ def inc
42
+ val = Thread.current.get_local(self) + 1
43
+ Thread.current.set_local(self, val)
44
+ return val
45
+ end
46
+ end
47
+
48
+ def single_thread
49
+ tt = ThreadTester.new(start = rand(10000000))
50
+ ((start+1)..start+11).each {|i| assert_equal(i, tt.inc) }
51
+ end
52
+
53
+ def test_threads_dont_share
54
+ threads = []
55
+ NUM_THREADS.times do
56
+ threads << Thread.new { single_thread }
57
+ end
58
+
59
+ threads.each {|t| t.join}
60
+ end
61
+ end
@@ -4,11 +4,12 @@ require File.dirname(__FILE__) + "/../../test_helper"
4
4
  class WeakKeyHashTest < Test::Unit::TestCase
5
5
  include Ferret::Utils
6
6
 
7
- def test_marshalling()
7
+ def test_objects_are_destroyed()
8
8
  w = WeakKeyHash.new()
9
9
  a = []
10
10
  10.times {|i| a[i] = "#{i}"; w[a[i]] = i }
11
11
  10.times {|i| assert_equal(i, w[a[i]]) }
12
+ assert_equal(10, w.size)
12
13
  10.times {|i| a[i] = nil; }
13
14
  #puts w
14
15
 
@@ -16,7 +17,6 @@ class WeakKeyHashTest < Test::Unit::TestCase
16
17
  x = WeakKeyHash.new()
17
18
  10.times {|i| a[i] = "#{i}"; x[a[i]] = i }
18
19
 
19
- assert_equal(10, w.size)
20
20
  GC.start
21
21
  #puts w.size
22
22
  #puts w
@@ -0,0 +1,132 @@
1
+ # Author: Matthew D Moss
2
+ #
3
+ # Writtern for ruby quiz #25
4
+ #
5
+ class JapaneseTranslator
6
+ # My knowledge of counting Japanese is limited, so this may not
7
+ # be entirely correct; in particular, I don't know what rules
8
+ # to follow after 'hyaku man' (1,000,000).
9
+ # I also combine a digit with its group, such as 'gohyaku' rather
10
+ # than 'go hyaku'; I just like reading it better that way.
11
+
12
+ DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu)
13
+ GROUPS = %w(nothingtoseeheremovealong ju hyaku sen)
14
+ MAN = 10000
15
+
16
+ def to_spoken(val)
17
+ case val <=> 0
18
+ when -1
19
+ '- ' + to_spoken(-val)
20
+ when 0
21
+ DIGITS[0]
22
+ else
23
+ group(val, 0)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def group(val, level)
30
+ if val >= MAN
31
+ group(val / MAN, 0) + 'man ' + group(val % MAN, 0)
32
+ else
33
+ case val
34
+ when 0
35
+ ''
36
+ when 1
37
+ level == 0 ? DIGITS[val] : GROUPS[level]
38
+ when 2...10
39
+ DIGITS[val] + (GROUPS[level] if level > 0).to_s
40
+ else
41
+ group(val / 10, level+1) + ' ' + group(val % 10, level)
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+
48
+ class USEnglishTranslator
49
+ # Formal, US English. Optional 'and'. Will not produce things
50
+ # such as 'twelve hundred' but rather 'one thousand two hundred'.
51
+ # The use of 'and' is incomplete; it is sometimes missed.
52
+
53
+ DIGITS = %w(zero one two three four five six seven eight nine)
54
+ TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen
55
+ seventeen eighteen nineteen)
56
+ TENS = %w(hello world twenty thirty forty fifty sixty seventy
57
+ eighty ninety)
58
+ GROUPS = %w(thousand million billion trillion quadrillion
59
+ quintillion sextillion septillion octillion nonillion
60
+ decillion)
61
+ K = 1000
62
+
63
+ def initialize(conjunction = true)
64
+ @conjunction = conjunction
65
+ end
66
+
67
+ def to_spoken(val)
68
+ case val <=> 0
69
+ when -1
70
+ 'negative ' + to_spoken(-val)
71
+ when 0
72
+ DIGITS[0]
73
+ else
74
+ group(val, 0).flatten.join(' ')
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def group(val, level)
81
+ x = group(val / K, level + 1) << GROUPS[level] if val >= K
82
+ x.to_a << under_1000(val % K, level)
83
+ end
84
+
85
+ def under_1000(val, level)
86
+ x = [DIGITS[val / 100]] << 'hundred' if val >= 100
87
+ x.to_a << under_100(val % 100, (level == 0 and not x.nil?))
88
+ end
89
+
90
+ def under_100(val, junction)
91
+ x = [('and' if @conjunction and junction)] # wyf?
92
+ case val
93
+ when 0
94
+ []
95
+ when 1...10
96
+ x << DIGITS[val]
97
+ when 10...20
98
+ x << TEENS[val - 10]
99
+ else
100
+ d = val % 10
101
+ x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s)
102
+ end
103
+ end
104
+ end
105
+
106
+
107
+ class Integer
108
+ def to_spoken(translator = USEnglishTranslator.new)
109
+ translator.to_spoken(self).squeeze(' ').strip
110
+ end
111
+ end
112
+
113
+ if $0 == __FILE__
114
+ SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99,
115
+ 100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610,
116
+ 1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283,
117
+ 2435489238, 234100090000, -42, -2001 ]
118
+
119
+ TRANSLATORS = { 'US English' => USEnglishTranslator.new,
120
+ 'Japanese' => JapaneseTranslator.new }
121
+
122
+
123
+ # main
124
+ TRANSLATORS.each do |lang, translator|
125
+ puts
126
+ puts lang
127
+ puts '-' * lang.length
128
+ SAMPLES.each do |val|
129
+ puts "%12d => %s" % [val, val.to_spoken(translator)]
130
+ end
131
+ end
132
+ end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.3
7
- date: 2005-10-25 00:00:00 +09:00
6
+ version: 0.1.4
7
+ date: 2005-11-01 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -164,6 +164,7 @@ files:
164
164
  - lib/ferret/utils/string_helper.rb
165
165
  - lib/ferret/utils/number_tools.rb
166
166
  - lib/ferret/utils/date_tools.rb
167
+ - lib/ferret/utils/thread_local.rb
167
168
  - test/test_helper.rb
168
169
  - test/test_all.rb
169
170
  - test/unit/ts_document.rb
@@ -180,6 +181,7 @@ files:
180
181
  - test/unit/utils/tc_date_tools.rb
181
182
  - test/unit/utils/tc_parameter.rb
182
183
  - test/unit/utils/tc_weak_key_hash.rb
184
+ - test/unit/utils/tc_thread.rb
183
185
  - test/unit/analysis/tc_lower_case_tokenizer.rb
184
186
  - test/unit/analysis/tc_lower_case_filter.rb
185
187
  - test/unit/analysis/tc_porter_stem_filter.rb
@@ -230,6 +232,9 @@ files:
230
232
  - test/longrunning/tm_store.rb
231
233
  - test/benchmark/tb_rw_vint.rb
232
234
  - test/benchmark/tb_ram_store.rb
235
+ - test/functional/thread_safety_index_test.rb
236
+ - test/functional/thread_safety_test.rb
237
+ - test/utils/number_to_spoken.rb
233
238
  - test/unit/analysis/data/wordfile
234
239
  - rake_utils/code_statistics.rb
235
240
  test_files: []