ferret 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/TODO +3 -0
- data/ext/dummy.exe +0 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/token.rb +6 -0
- data/lib/ferret/analysis/tokenizers.rb +5 -5
- data/lib/ferret/document/document.rb +10 -13
- data/lib/ferret/index/compound_file_io.rb +12 -9
- data/lib/ferret/index/field_infos.rb +0 -6
- data/lib/ferret/index/index.rb +220 -102
- data/lib/ferret/index/index_reader.rb +22 -2
- data/lib/ferret/index/index_writer.rb +55 -14
- data/lib/ferret/index/multi_reader.rb +279 -279
- data/lib/ferret/index/segment_infos.rb +3 -3
- data/lib/ferret/index/segment_merger.rb +7 -6
- data/lib/ferret/index/segment_reader.rb +23 -7
- data/lib/ferret/index/segment_term_enum.rb +6 -7
- data/lib/ferret/index/term_buffer.rb +3 -5
- data/lib/ferret/index/term_doc_enum.rb +7 -2
- data/lib/ferret/index/term_infos_io.rb +15 -8
- data/lib/ferret/query_parser/query_parser.tab.rb +49 -45
- data/lib/ferret/search/boolean_query.rb +3 -4
- data/lib/ferret/search/boolean_scorer.rb +11 -11
- data/lib/ferret/search/caching_wrapper_filter.rb +1 -1
- data/lib/ferret/search/disjunction_sum_scorer.rb +9 -7
- data/lib/ferret/search/field_cache.rb +1 -2
- data/lib/ferret/search/field_sorted_hit_queue.rb +1 -1
- data/lib/ferret/search/fuzzy_term_enum.rb +64 -58
- data/lib/ferret/search/index_searcher.rb +16 -9
- data/lib/ferret/search/prefix_query.rb +7 -0
- data/lib/ferret/search/query_filter.rb +1 -1
- data/lib/ferret/search/term_scorer.rb +5 -1
- data/lib/ferret/search/top_docs.rb +12 -0
- data/lib/ferret/store/buffered_index_io.rb +5 -6
- data/lib/ferret/store/fs_store.rb +47 -33
- data/lib/ferret/store/ram_store.rb +2 -2
- data/lib/ferret/utils.rb +1 -0
- data/lib/ferret/utils/bit_vector.rb +20 -2
- data/lib/ferret/utils/thread_local.rb +28 -0
- data/lib/ferret/utils/weak_key_hash.rb +11 -2
- data/test/benchmark/tb_rw_vint.rb +1 -1
- data/test/functional/thread_safety_index_test.rb +81 -0
- data/test/functional/thread_safety_test.rb +137 -0
- data/test/test_all.rb +3 -7
- data/test/test_helper.rb +2 -1
- data/test/unit/index/tc_compound_file_io.rb +2 -2
- data/test/unit/index/tc_index.rb +128 -6
- data/test/unit/index/tc_index_reader.rb +1 -1
- data/test/unit/index/tc_segment_infos.rb +1 -1
- data/test/unit/index/th_doc.rb +1 -1
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/store/tc_fs_store.rb +3 -3
- data/test/unit/utils/tc_bit_vector.rb +8 -0
- data/test/unit/utils/tc_thread.rb +61 -0
- data/test/unit/utils/tc_weak_key_hash.rb +2 -2
- data/test/utils/number_to_spoken.rb +132 -0
- metadata +7 -2
@@ -99,7 +99,7 @@ module Ferret::Store
|
|
99
99
|
|
100
100
|
def to_s
|
101
101
|
str = "The files in this directory are: \n"
|
102
|
-
@files.each do |path,file|
|
102
|
+
@files.each do |path, file|
|
103
103
|
str << path + " - " + file.size.to_s + "\n"
|
104
104
|
end
|
105
105
|
str
|
@@ -230,12 +230,12 @@ module Ferret::Store
|
|
230
230
|
attr_accessor :mtime
|
231
231
|
#attr_accessor :name
|
232
232
|
attr_accessor :length
|
233
|
+
alias :size :length
|
233
234
|
|
234
235
|
|
235
236
|
def initialize(name)
|
236
237
|
@buffers = Array.new
|
237
238
|
@mtime = Time.now
|
238
|
-
#@name = name
|
239
239
|
@length = 0
|
240
240
|
end
|
241
241
|
end
|
data/lib/ferret/utils.rb
CHANGED
@@ -74,7 +74,7 @@ module Ferret::Utils
|
|
74
74
|
def write(d, name)
|
75
75
|
output = d.create_output(name)
|
76
76
|
begin
|
77
|
-
output.
|
77
|
+
output.write_string(self.class.bignum_to_string(@bits))
|
78
78
|
ensure
|
79
79
|
output.close()
|
80
80
|
end
|
@@ -86,7 +86,7 @@ module Ferret::Utils
|
|
86
86
|
bv = BitVector.new
|
87
87
|
input = d.open_input(name)
|
88
88
|
begin
|
89
|
-
bv.bits = input.
|
89
|
+
bv.bits = string_to_bignum(input.read_string())
|
90
90
|
ensure
|
91
91
|
input.close()
|
92
92
|
end
|
@@ -101,5 +101,23 @@ module Ferret::Utils
|
|
101
101
|
end
|
102
102
|
puts ""
|
103
103
|
end
|
104
|
+
|
105
|
+
# converts a BigNum into a string
|
106
|
+
def BitVector.bignum_to_string(num)
|
107
|
+
str = []
|
108
|
+
while (num > 0)
|
109
|
+
str << (num & 0xff)
|
110
|
+
num >>= 8
|
111
|
+
end
|
112
|
+
return str.pack("C*")
|
113
|
+
end
|
114
|
+
|
115
|
+
# converts a string into a bignum
|
116
|
+
def BitVector.string_to_bignum(str)
|
117
|
+
str = str.unpack("C*")
|
118
|
+
num = 0
|
119
|
+
str.reverse.each {|c| num = ((num << 8) | c) }
|
120
|
+
return num
|
121
|
+
end
|
104
122
|
end
|
105
123
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'thread'
|
2
|
+
class Thread
|
3
|
+
def make_deleter
|
4
|
+
lambda{|id| @ferret_cache.delete(id)}
|
5
|
+
end
|
6
|
+
|
7
|
+
# Set the local value for the thread
|
8
|
+
def set_local(key, value)
|
9
|
+
@del ||= make_deleter
|
10
|
+
@ferret_cache ||= {}
|
11
|
+
ObjectSpace.define_finalizer(key, @del)
|
12
|
+
@ferret_cache[key.object_id] = value
|
13
|
+
end
|
14
|
+
|
15
|
+
# Get the local value for the thread
|
16
|
+
def get_local(key)
|
17
|
+
return (@ferret_cache ||= {})[key.object_id]
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the number of local variables stored. Useful for testing.
|
21
|
+
def local_size
|
22
|
+
return (@ferret_cache ||= {}).size
|
23
|
+
end
|
24
|
+
|
25
|
+
def clear_local
|
26
|
+
(@ferret_cache ||= {}).clear
|
27
|
+
end
|
28
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
module Ferret::Utils
|
2
2
|
|
3
3
|
require 'weakref'
|
4
|
+
require 'monitor'
|
4
5
|
|
5
6
|
# This class implements a weak key hash. ie all keys that are stored in this
|
6
7
|
# hash can still be garbage collected, and if they are garbage collected
|
@@ -18,29 +19,37 @@ module Ferret::Utils
|
|
18
19
|
# # have been removed from the hash
|
19
20
|
# puts last_names["david"] #=>nil
|
20
21
|
#
|
21
|
-
#
|
22
|
+
# WeakKeyHash subclasses Monitor so it can be synchronized on.
|
23
|
+
#
|
24
|
+
# === NOTE
|
22
25
|
# Unfortunately the ruby garbage collector is not always predictable so your
|
23
26
|
# results may differ but each key should eventually be freed when all other
|
24
27
|
# references have been removed and the garbage collector is ready.
|
25
|
-
class WeakKeyHash
|
28
|
+
class WeakKeyHash < Monitor
|
29
|
+
# Create a new WeakKeyHash.
|
26
30
|
def initialize
|
31
|
+
super()
|
27
32
|
@hash = {}
|
28
33
|
@deleter = lambda{|id| @hash.delete(id)}
|
29
34
|
end
|
30
35
|
|
36
|
+
# Set the value for the key just like a Hash
|
31
37
|
def []=(key, value)
|
32
38
|
ObjectSpace.define_finalizer(key, @deleter)
|
33
39
|
@hash[key.object_id] = value
|
34
40
|
end
|
35
41
|
|
42
|
+
# Get the value for the key
|
36
43
|
def [](key)
|
37
44
|
return @hash[key.object_id]
|
38
45
|
end
|
39
46
|
|
47
|
+
# Return the number of elements in the Hash
|
40
48
|
def size
|
41
49
|
@hash.size
|
42
50
|
end
|
43
51
|
|
52
|
+
# Print a string representation the WeakKeyHash
|
44
53
|
def to_s
|
45
54
|
buffer = ""
|
46
55
|
@hash.each_pair {|key, value| buffer << "<#{ObjectSpace._id2ref(key)}=>#{value}>"}
|
@@ -9,7 +9,7 @@ t = Time.new
|
|
9
9
|
10.times do
|
10
10
|
dpath = File.join(File.dirname(__FILE__),
|
11
11
|
'fsdir')
|
12
|
-
dir = Ferret::Store::FSDirectory.
|
12
|
+
dir = Ferret::Store::FSDirectory.new(dpath, true)
|
13
13
|
|
14
14
|
100.times do
|
15
15
|
ostream = dir.create_output("rw_vint.test")
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class IndexThreadSafetyTest < Test::Unit::TestCase
|
6
|
+
include Ferret::Index
|
7
|
+
include Ferret::Document
|
8
|
+
|
9
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
10
|
+
ITERATIONS = 100000
|
11
|
+
NUM_THREADS = 2
|
12
|
+
ANALYZER = Ferret::Analysis::Analyzer.new()
|
13
|
+
|
14
|
+
def setup
|
15
|
+
@index = Index.new(:path => 'index2',
|
16
|
+
:create => true,
|
17
|
+
:analyzer => ANALYZER,
|
18
|
+
:default_field => 'contents')
|
19
|
+
end
|
20
|
+
|
21
|
+
def indexing_thread()
|
22
|
+
ITERATIONS.times do
|
23
|
+
choice = rand()
|
24
|
+
|
25
|
+
if choice > 0.98
|
26
|
+
do_optimize
|
27
|
+
elsif choice > 0.9
|
28
|
+
do_delete_doc
|
29
|
+
elsif choice > 0.7
|
30
|
+
do_search
|
31
|
+
else
|
32
|
+
do_add_doc
|
33
|
+
end
|
34
|
+
end
|
35
|
+
rescue => e
|
36
|
+
puts e
|
37
|
+
puts e.backtrace
|
38
|
+
@index = nil
|
39
|
+
raise e
|
40
|
+
end
|
41
|
+
|
42
|
+
def do_optimize
|
43
|
+
puts "Optimizing the index"
|
44
|
+
@index.optimize
|
45
|
+
end
|
46
|
+
|
47
|
+
def do_delete_doc
|
48
|
+
return if @index.size == 0
|
49
|
+
doc_num = rand(@index.size)
|
50
|
+
puts "Deleting #{doc_num} from index which has#{@index.has_deletions? ? "" : " no"} deletions"
|
51
|
+
puts "document was already deleted" if (@index.deleted?(doc_num))
|
52
|
+
@index.delete(doc_num)
|
53
|
+
end
|
54
|
+
|
55
|
+
def do_add_doc
|
56
|
+
d = Document.new()
|
57
|
+
n = rand(0xFFFFFFFF)
|
58
|
+
d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
|
59
|
+
d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
|
60
|
+
puts("Adding #{n}")
|
61
|
+
@index << d
|
62
|
+
end
|
63
|
+
|
64
|
+
def do_search
|
65
|
+
n = rand(0xFFFFFFFF)
|
66
|
+
puts("Searching for #{n}")
|
67
|
+
hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
|
68
|
+
puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
|
69
|
+
end
|
70
|
+
puts("Searched for #{n}: total = #{hits}")
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_threading
|
74
|
+
threads = []
|
75
|
+
NUM_THREADS.times do
|
76
|
+
threads << Thread.new { indexing_thread }
|
77
|
+
end
|
78
|
+
|
79
|
+
threads.each {|t| t.join}
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class ThreadSafetyTest
|
6
|
+
include Ferret::Index
|
7
|
+
include Ferret::Search
|
8
|
+
include Ferret::Store
|
9
|
+
include Ferret::Document
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
16
|
+
ANALYZER = Ferret::Analysis::Analyzer.new()
|
17
|
+
ITERATIONS = 19
|
18
|
+
@@searcher = nil
|
19
|
+
|
20
|
+
def run_index_thread(writer)
|
21
|
+
reopen_interval = 30 + rand(60)
|
22
|
+
|
23
|
+
use_compound_file = false
|
24
|
+
|
25
|
+
(400*ITERATIONS).times do |i|
|
26
|
+
d = Document.new()
|
27
|
+
n = rand(0xFFFFFFFF)
|
28
|
+
d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
|
29
|
+
d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
|
30
|
+
puts("Adding #{n}")
|
31
|
+
|
32
|
+
# Switch between single and multiple file segments
|
33
|
+
use_compound_file = (rand < 0.5)
|
34
|
+
writer.use_compound_file = use_compound_file
|
35
|
+
|
36
|
+
writer << d
|
37
|
+
|
38
|
+
if (i % reopen_interval == 0)
|
39
|
+
writer.close()
|
40
|
+
writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
writer.close()
|
45
|
+
rescue => e
|
46
|
+
puts e
|
47
|
+
puts e.backtrace
|
48
|
+
raise e
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_search_thread(use_global)
|
52
|
+
reopen_interval = 10 + rand(20)
|
53
|
+
|
54
|
+
unless use_global
|
55
|
+
searcher = IndexSearcher.new(INDEX_DIR)
|
56
|
+
end
|
57
|
+
|
58
|
+
(50*ITERATIONS).times do |i|
|
59
|
+
search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
|
60
|
+
if (i%reopen_interval == 0)
|
61
|
+
if (searcher == nil)
|
62
|
+
@@searcher = IndexSearcher.new(INDEX_DIR)
|
63
|
+
else
|
64
|
+
searcher.close()
|
65
|
+
searcher = IndexSearcher.new(INDEX_DIR)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
rescue => e
|
70
|
+
puts e
|
71
|
+
puts e.backtrace
|
72
|
+
raise e
|
73
|
+
end
|
74
|
+
|
75
|
+
def search_for(n, searcher)
|
76
|
+
puts("Searching for #{n}")
|
77
|
+
hits =
|
78
|
+
searcher.search(Ferret::QueryParser.parse(n.to_spoken, "contents", :analyzer => ANALYZER),
|
79
|
+
:num_docs => 3)
|
80
|
+
puts("Search for #{n}: total = #{hits.size}")
|
81
|
+
hits.each do |d, s|
|
82
|
+
puts "Hit for #{n}: #{searcher.reader.get_document(d)["id"]} - #{s}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def run_test_threads
|
87
|
+
|
88
|
+
threads = []
|
89
|
+
unless @options[:read_only]
|
90
|
+
writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER,
|
91
|
+
:create => !@options[:add])
|
92
|
+
|
93
|
+
threads << Thread.new { run_index_thread(writer) }
|
94
|
+
|
95
|
+
sleep(1)
|
96
|
+
end
|
97
|
+
|
98
|
+
threads << Thread.new { run_search_thread(false)}
|
99
|
+
|
100
|
+
@@searcher = IndexSearcher.new(INDEX_DIR)
|
101
|
+
threads << Thread.new { run_search_thread(true)}
|
102
|
+
|
103
|
+
threads << Thread.new { run_search_thread(true)}
|
104
|
+
|
105
|
+
threads.each {|t| t.join}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
if $0 == __FILE__
|
111
|
+
require 'optparse'
|
112
|
+
|
113
|
+
OPTIONS = {
|
114
|
+
:all => false,
|
115
|
+
:read_only => false,
|
116
|
+
}
|
117
|
+
|
118
|
+
ARGV.options do |opts|
|
119
|
+
script_name = File.basename($0)
|
120
|
+
opts.banner = "Usage: ruby #{script_name} [options]"
|
121
|
+
|
122
|
+
opts.separator ""
|
123
|
+
|
124
|
+
opts.on("-r", "--read-only", "Read Only.") { OPTIONS[:all] = true }
|
125
|
+
opts.on("-a", "--all", "All.") { OPTIONS[:read_only] = true }
|
126
|
+
|
127
|
+
opts.separator ""
|
128
|
+
|
129
|
+
opts.on("-h", "--help",
|
130
|
+
"Show this help message.") { puts opts; exit }
|
131
|
+
|
132
|
+
opts.parse!
|
133
|
+
end
|
134
|
+
|
135
|
+
tst = ThreadSafetyTest.new(OPTIONS)
|
136
|
+
tst.run_test_threads
|
137
|
+
end
|
data/test/test_all.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
$:.unshift File.dirname(__FILE__)
|
2
|
+
require 'test_helper.rb'
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
require 'unit/ts_index.rb'
|
6
|
-
require 'unit/ts_query_parser.rb'
|
7
|
-
require 'unit/ts_search.rb'
|
8
|
-
require 'unit/ts_store.rb'
|
9
|
-
require 'unit/ts_utils.rb'
|
4
|
+
load_test_dir("unit")
|
5
|
+
#load_test_dir("functional")
|
data/test/test_helper.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
1
2
|
$:.unshift File.join(File.dirname(__FILE__), '../lib')
|
2
3
|
$:.unshift File.join(File.dirname(__FILE__), '../ext')
|
3
4
|
|
4
5
|
require 'test/unit'
|
5
6
|
require 'ferret'
|
6
|
-
require '
|
7
|
+
require 'unit/index/th_doc'
|
7
8
|
|
8
9
|
def load_test_dir(dir)
|
9
10
|
dir = File.join(File.dirname(__FILE__), dir)
|
@@ -60,8 +60,8 @@ class CompoundFileReaderTest < Test::Unit::TestCase
|
|
60
60
|
cfile.close()
|
61
61
|
|
62
62
|
cfile_reader = CompoundFileReader.new(@dir, "cfile")
|
63
|
-
assert_equal(4, cfile_reader.
|
64
|
-
assert_equal(15, cfile_reader.
|
63
|
+
assert_equal(4, cfile_reader.length('file1'))
|
64
|
+
assert_equal(15, cfile_reader.length('file2'))
|
65
65
|
file1 = cfile_reader.open_input('file1')
|
66
66
|
file2 = cfile_reader.open_input('file2')
|
67
67
|
assert_equal(20, file1.read_int())
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -4,6 +4,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
|
|
4
4
|
class IndexTest < Test::Unit::TestCase
|
5
5
|
include Ferret::Index
|
6
6
|
include Ferret::Analysis
|
7
|
+
include Ferret::Store
|
7
8
|
|
8
9
|
def setup()
|
9
10
|
@qp = Ferret::QueryParser.new()
|
@@ -134,12 +135,12 @@ class IndexTest < Test::Unit::TestCase
|
|
134
135
|
end
|
135
136
|
|
136
137
|
def test_fs_index
|
137
|
-
fs_path = File.join(File.dirname(__FILE__), '../../temp/fsdir')
|
138
|
-
`rm -rf #{fs_path}`
|
138
|
+
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
139
|
+
`rm -rf #{File.join(fs_path, "*")}`
|
139
140
|
assert_raise(Errno::ENOENT) {Index.new(:path => fs_path, :create_if_missing => false, :default_field => "def_field")}
|
140
141
|
index = Index.new(:path => fs_path, :default_field => "def_field")
|
141
142
|
do_test_index_with_array(index)
|
142
|
-
`rm -rf #{fs_path}`
|
143
|
+
`rm -rf #{File.join(fs_path, "*")}`
|
143
144
|
index = Index.new(:path => fs_path, :create => true, :default_field => "def_field")
|
144
145
|
do_test_index_with_hash(index)
|
145
146
|
index = Index.new(:path => fs_path, :create => true, :default_field => "def_field")
|
@@ -147,8 +148,8 @@ class IndexTest < Test::Unit::TestCase
|
|
147
148
|
end
|
148
149
|
|
149
150
|
def test_fs_index_is_persistant
|
150
|
-
fs_path = File.join(File.dirname(__FILE__), '../../temp/fsdir')
|
151
|
-
`rm -rf #{fs_path}`
|
151
|
+
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
152
|
+
`rm -rf #{File.join(fs_path, "*")}`
|
152
153
|
data = [
|
153
154
|
{"def_field" => "one two", :id => "me"},
|
154
155
|
{"def_field" => "one", :field2 => "three"},
|
@@ -163,8 +164,129 @@ class IndexTest < Test::Unit::TestCase
|
|
163
164
|
data.each {|doc| index << doc }
|
164
165
|
assert_equal(8, index.size)
|
165
166
|
index.close
|
166
|
-
index = Index.new(:path => fs_path, :
|
167
|
+
index = Index.new(:path => fs_path, :create_if_missing => false)
|
167
168
|
assert_equal(8, index.size)
|
168
169
|
assert_equal("four", index[5]["field3"])
|
169
170
|
end
|
171
|
+
|
172
|
+
def test_merging_indexes
|
173
|
+
data = [
|
174
|
+
{"f" => "zero"},
|
175
|
+
{"f" => "one"},
|
176
|
+
{"f" => "two"}
|
177
|
+
]
|
178
|
+
index1 = Index.new(:default_field => "f")
|
179
|
+
data.each {|doc| index1 << doc }
|
180
|
+
data = [
|
181
|
+
{"f" => "three"},
|
182
|
+
{"f" => "four"},
|
183
|
+
{"f" => "five"}
|
184
|
+
]
|
185
|
+
index2 = Index.new(:default_field => "f")
|
186
|
+
data.each {|doc| index2 << doc }
|
187
|
+
data = [
|
188
|
+
{"f" => "six"},
|
189
|
+
{"f" => "seven"},
|
190
|
+
{"f" => "eight"}
|
191
|
+
]
|
192
|
+
index3 = Index.new(:default_field => "f")
|
193
|
+
data.each {|doc| index3 << doc }
|
194
|
+
|
195
|
+
index = Index.new(:default_field => "f")
|
196
|
+
index.add_indexes(index1)
|
197
|
+
assert_equal(3, index.size)
|
198
|
+
assert_equal("zero", index[0]["f"])
|
199
|
+
index.add_indexes([index2, index3])
|
200
|
+
assert_equal(9, index.size)
|
201
|
+
assert_equal("zero", index[0]["f"])
|
202
|
+
assert_equal("eight", index[8]["f"])
|
203
|
+
index1.close
|
204
|
+
index2.close
|
205
|
+
index3.close
|
206
|
+
assert_equal("seven", index[7]["f"])
|
207
|
+
data = [
|
208
|
+
{"f" => "alpha"},
|
209
|
+
{"f" => "beta"},
|
210
|
+
{"f" => "charlie"}
|
211
|
+
]
|
212
|
+
dir1 = RAMDirectory.new
|
213
|
+
index1 = Index.new(:dir => dir1, :default_field => "f")
|
214
|
+
data.each {|doc| index1 << doc }
|
215
|
+
index1.flush
|
216
|
+
data = [
|
217
|
+
{"f" => "delta"},
|
218
|
+
{"f" => "echo"},
|
219
|
+
{"f" => "foxtrot"}
|
220
|
+
]
|
221
|
+
dir2 = RAMDirectory.new
|
222
|
+
index2 = Index.new(:dir => dir2, :default_field => "f")
|
223
|
+
data.each {|doc| index2 << doc }
|
224
|
+
index2.flush
|
225
|
+
data = [
|
226
|
+
{"f" => "golf"},
|
227
|
+
{"f" => "india"},
|
228
|
+
{"f" => "juliet"}
|
229
|
+
]
|
230
|
+
dir3 = RAMDirectory.new
|
231
|
+
index3 = Index.new(:dir => dir3, :default_field => "f")
|
232
|
+
data.each {|doc| index3 << doc }
|
233
|
+
index3.flush
|
234
|
+
|
235
|
+
index.add_indexes(dir1)
|
236
|
+
assert_equal(12, index.size)
|
237
|
+
assert_equal("alpha", index[9]["f"])
|
238
|
+
index.add_indexes([dir2, dir3])
|
239
|
+
assert_equal(18, index.size)
|
240
|
+
assert_equal("juliet", index[17]["f"])
|
241
|
+
index1.close
|
242
|
+
dir1.close
|
243
|
+
index2.close
|
244
|
+
dir2.close
|
245
|
+
index3.close
|
246
|
+
dir3.close
|
247
|
+
assert_equal("golf", index[15]["f"])
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_persist_index
|
251
|
+
data = [
|
252
|
+
{"f" => "zero"},
|
253
|
+
{"f" => "one"},
|
254
|
+
{"f" => "two"}
|
255
|
+
]
|
256
|
+
index = Index.new(:default_field => "f")
|
257
|
+
data.each {|doc| index << doc }
|
258
|
+
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
259
|
+
index.persist(fs_path, true)
|
260
|
+
assert_equal(3, index.size)
|
261
|
+
assert_equal("zero", index[0]["f"])
|
262
|
+
index.close
|
263
|
+
|
264
|
+
index = Index.new(:path => fs_path)
|
265
|
+
assert_equal(3, index.size)
|
266
|
+
assert_equal("zero", index[0]["f"])
|
267
|
+
index.close
|
268
|
+
|
269
|
+
|
270
|
+
data = [
|
271
|
+
{"f" => "romeo"},
|
272
|
+
{"f" => "sierra"},
|
273
|
+
{"f" => "tango"}
|
274
|
+
]
|
275
|
+
index = Index.new(:default_field => "f")
|
276
|
+
data.each {|doc| index << doc }
|
277
|
+
assert_equal(3, index.size)
|
278
|
+
assert_equal("romeo", index[0]["f"])
|
279
|
+
dir = FSDirectory.new(fs_path, false)
|
280
|
+
index.persist(dir)
|
281
|
+
assert_equal(6, index.size)
|
282
|
+
assert_equal("zero", index[0]["f"])
|
283
|
+
assert_equal("romeo", index[3]["f"])
|
284
|
+
index.close
|
285
|
+
|
286
|
+
index = Index.new(:path => fs_path)
|
287
|
+
assert_equal(6, index.size)
|
288
|
+
assert_equal("zero", index[0]["f"])
|
289
|
+
assert_equal("romeo", index[3]["f"])
|
290
|
+
index.close
|
291
|
+
end
|
170
292
|
end
|