ferret 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -1
- data/ext/analysis.c +21 -13
- data/ext/array.c +1 -1
- data/ext/bitvector.c +2 -2
- data/ext/defines.h +0 -6
- data/ext/except.c +6 -6
- data/ext/except.h +12 -8
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +4 -0
- data/ext/ferret.h +1 -0
- data/ext/fs_store.c +18 -4
- data/ext/global.c +18 -16
- data/ext/global.h +7 -2
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/inc/lang.h +7 -1
- data/ext/ind.c +4 -4
- data/ext/ind.h +3 -3
- data/ext/index.c +33 -26
- data/ext/index.h +1 -1
- data/ext/lang.h +7 -1
- data/ext/mem_pool.c +1 -1
- data/ext/mem_pool.h +1 -1
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_match_all.c +2 -2
- data/ext/q_multi_term.c +1 -1
- data/ext/q_parser.c +60 -52
- data/ext/r_analysis.c +6 -4
- data/ext/r_index.c +57 -4
- data/ext/r_search.c +1 -1
- data/ext/r_utils.c +1 -1
- data/ext/ram_store.c +1 -1
- data/ext/search.c +4 -4
- data/ext/sort.c +3 -3
- data/ext/store.c +9 -9
- data/ext/store.h +4 -4
- data/ext/tags +7841 -0
- data/ext/term_vectors.c +3 -3
- data/lib/ferret/index.rb +69 -7
- data/test/test_helper.rb +3 -2
- data/test/unit/analysis/tc_token_stream.rb +1 -0
- data/test/unit/index/tc_index.rb +157 -2
- data/test/unit/index/tc_index_reader.rb +108 -5
- data/test/unit/query_parser/tc_query_parser.rb +2 -1
- data/test/unit/search/tc_index_searcher.rb +1 -1
- data/test/unit/search/tc_multi_searcher.rb +2 -1
- data/test/unit/search/tc_spans.rb +1 -1
- data/test/unit/store/tc_fs_store.rb +6 -3
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_number_tools.rb +1 -1
- metadata +138 -137
data/ext/term_vectors.c
CHANGED
@@ -250,10 +250,10 @@ TermVectorsWriter *tvw_open(Store *store, const char *segment, FieldInfos *fis)
|
|
250
250
|
tvw->fis = fis;
|
251
251
|
tvw->fields = ary_new_type_capa(TVField, TV_FIELD_INIT_CAPA);
|
252
252
|
|
253
|
-
|
253
|
+
snprintf(file_name, SEGMENT_NAME_MAX_LENGTH, "%s.tvx", segment);
|
254
254
|
tvw->tvx_out = store->new_output(store, file_name);
|
255
255
|
|
256
|
-
|
256
|
+
snprintf(file_name, SEGMENT_NAME_MAX_LENGTH, "%s.tvd", segment);
|
257
257
|
tvw->tvd_out = store->new_output(store, file_name);
|
258
258
|
|
259
259
|
return tvw;
|
@@ -335,7 +335,7 @@ void tvw_add_postings(TermVectorsWriter *tvw,
|
|
335
335
|
|
336
336
|
if (fi_store_offsets(fi)) {
|
337
337
|
/* use delta encoding for offsets */
|
338
|
-
int last_end = 0;
|
338
|
+
int last_end = 0;
|
339
339
|
os_write_vint(tvd_out, offset_count); /* write shared prefix length */
|
340
340
|
for (i = 0; i < offset_count; i++) {
|
341
341
|
int start = offsets[i].start;
|
data/lib/ferret/index.rb
CHANGED
@@ -10,6 +10,7 @@ module Ferret::Index
|
|
10
10
|
include Ferret::Search
|
11
11
|
|
12
12
|
attr_reader :options
|
13
|
+
|
13
14
|
# If you create an Index without any options, it'll simply create an index
|
14
15
|
# in memory. But this class is highly configurable and every option that
|
15
16
|
# you can supply to IndexWriter and QueryParser, you can also set here.
|
@@ -52,6 +53,10 @@ module Ferret::Index
|
|
52
53
|
# concerned about performance. In that case you
|
53
54
|
# should think about setting up a DRb indexing
|
54
55
|
# service.
|
56
|
+
# lock_retry_time:: Default: 2 seconds. This parameter specifies how
|
57
|
+
# long to wait before retrying to obtain the
|
58
|
+
# commit lock when detecting if the IndexReader is
|
59
|
+
# at the latest version.
|
55
60
|
#
|
56
61
|
# Some examples;
|
57
62
|
#
|
@@ -64,8 +69,14 @@ module Ferret::Index
|
|
64
69
|
# index = Index::Index.new(:dir => directory,
|
65
70
|
# :default_slop => 2,
|
66
71
|
# :handle_parse_errors => false)
|
67
|
-
#
|
68
|
-
|
72
|
+
#
|
73
|
+
# You can also pass a block if you like. The index will be yielded and
|
74
|
+
# closed at the index of the box. For example;
|
75
|
+
#
|
76
|
+
# Ferret::I.new() do |index|
|
77
|
+
# # do stuff with index. Most of your actions will be cached.
|
78
|
+
# end
|
79
|
+
def initialize(options = {}, &block)
|
69
80
|
super()
|
70
81
|
|
71
82
|
if options[:key]
|
@@ -92,14 +103,19 @@ module Ferret::Index
|
|
92
103
|
end
|
93
104
|
|
94
105
|
options[:dir] = @dir
|
106
|
+
options[:lock_retry_time]||= 2
|
95
107
|
@dir.extend(MonitorMixin)
|
96
108
|
@dir.synchronize do
|
97
109
|
@options = options
|
98
|
-
|
99
|
-
|
100
|
-
|
110
|
+
if (!@dir.exists?("segments")) || options[:create]
|
111
|
+
IndexWriter.new(options).close
|
112
|
+
end
|
113
|
+
options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
|
114
|
+
|
115
|
+
@searcher = nil
|
101
116
|
@writer = nil
|
102
117
|
@reader = nil
|
118
|
+
|
103
119
|
@options.delete(:create) # only want to create the first time if at all
|
104
120
|
@auto_flush = @options[:auto_flush] || false
|
105
121
|
if (@options[:id_field].nil? and
|
@@ -117,13 +133,51 @@ module Ferret::Index
|
|
117
133
|
@open = true
|
118
134
|
@qp = nil
|
119
135
|
end
|
136
|
+
if block
|
137
|
+
yield self
|
138
|
+
self.close
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Returns an array of strings with the matches highlighted. The +query+ can
|
143
|
+
# either a query String or a Ferret::Search::Query object. The doc_id is
|
144
|
+
# the id of the document you want to highlight (usually returned by the
|
145
|
+
# search methods). There are also a number of options you can pass;
|
146
|
+
#
|
147
|
+
# === Options
|
148
|
+
#
|
149
|
+
# :field:: Default: @options[:default_field]. The default_field
|
150
|
+
# is the field that is usually highlighted but you can
|
151
|
+
# specify which field you want to highlight here. If
|
152
|
+
# you want to highlight multiple fields then you will
|
153
|
+
# need to call this method multiple times.
|
154
|
+
# :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
155
|
+
# terms will be in the centre of the excerpt.
|
156
|
+
# :num_excerpts:: Default: 2. Number of excerpts to return.
|
157
|
+
# :pre_tag:: Default: "<b>". Tag to place to the left of the
|
158
|
+
# match. You'll probably want to change this to a
|
159
|
+
# "<span>" tag with a class "\033[7m" for use in a
|
160
|
+
# terminal.
|
161
|
+
# :post_tag:: Default: "</b>". This tag should close the
|
162
|
+
# +:pre_tag+. Try tag "\033[m" in the terminal.
|
163
|
+
# :ellipsis:: Default: "...". This is the string that is appended
|
164
|
+
# at the beginning and end of excerpts (unless the
|
165
|
+
# excerpt hits the start or end of the field. You'll
|
166
|
+
# probably want to change this so a Unicode elipsis
|
167
|
+
# character.
|
168
|
+
def highlight(query, doc_id, options = {})
|
169
|
+
ensure_searcher_open()
|
170
|
+
@searcher.highlight(process_query(query),
|
171
|
+
doc_id,
|
172
|
+
options[:field]||@options[:default_field],
|
173
|
+
options)
|
120
174
|
end
|
121
175
|
|
122
176
|
# Closes this index by closing its associated reader and writer objects.
|
123
177
|
def close
|
124
178
|
@dir.synchronize do
|
125
179
|
if not @open
|
126
|
-
raise "tried to close an already closed directory"
|
180
|
+
raise(StandardError, "tried to close an already closed directory")
|
127
181
|
end
|
128
182
|
@searcher.close() if @searcher
|
129
183
|
@reader.close() if @reader
|
@@ -534,7 +588,15 @@ module Ferret::Index
|
|
534
588
|
def ensure_reader_open()
|
535
589
|
raise "tried to use a closed index" if not @open
|
536
590
|
if @reader
|
537
|
-
|
591
|
+
latest = false
|
592
|
+
begin
|
593
|
+
latest = @reader.latest?
|
594
|
+
rescue LockException => le
|
595
|
+
sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
|
596
|
+
latest = @reader.latest?
|
597
|
+
end
|
598
|
+
if not latest
|
599
|
+
@reader.close
|
538
600
|
return @reader = IndexReader.new(@dir)
|
539
601
|
end
|
540
602
|
else
|
data/test/test_helper.rb
CHANGED
@@ -3,9 +3,10 @@ $:.unshift File.join(File.dirname(__FILE__), '../lib')
|
|
3
3
|
$:.unshift File.join(File.dirname(__FILE__), '../ext')
|
4
4
|
|
5
5
|
class Float
|
6
|
-
def
|
7
|
-
return (1 - self/o).abs < 0.
|
6
|
+
def approx_eql?(o)
|
7
|
+
return (1 - self/o).abs < 0.0001
|
8
8
|
end
|
9
|
+
alias :=~ :approx_eql?
|
9
10
|
end
|
10
11
|
|
11
12
|
require 'test/unit'
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -9,7 +9,7 @@ class IndexTest < Test::Unit::TestCase
|
|
9
9
|
def setup()
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
12
|
+
def teardown()
|
13
13
|
end
|
14
14
|
|
15
15
|
def check_results(index, query, expected)
|
@@ -345,9 +345,10 @@ class IndexTest < Test::Unit::TestCase
|
|
345
345
|
assert_equal(2, index2.size)
|
346
346
|
assert_equal(2, index.size)
|
347
347
|
top_docs = index.search("content3")
|
348
|
+
|
348
349
|
assert_equal(0, top_docs.hits.size)
|
349
350
|
|
350
|
-
iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new
|
351
|
+
iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new)
|
351
352
|
iw << {:f, "content3"}
|
352
353
|
iw.close()
|
353
354
|
|
@@ -355,6 +356,7 @@ class IndexTest < Test::Unit::TestCase
|
|
355
356
|
assert_equal(1, top_docs.hits.size)
|
356
357
|
assert_equal(3, index.size)
|
357
358
|
assert_equal("content3", index[2][:f])
|
359
|
+
index2.close
|
358
360
|
index.close
|
359
361
|
end
|
360
362
|
|
@@ -556,6 +558,7 @@ class IndexTest < Test::Unit::TestCase
|
|
556
558
|
|
557
559
|
data = %q(one two three four five six seven eight nine ten eleven twelve)
|
558
560
|
index1 = Index.new(:path => fs_path, :auto_flush => true, :key => :id)
|
561
|
+
index1 << "zero"
|
559
562
|
index2 = Index.new(:path => fs_path, :auto_flush => true)
|
560
563
|
begin
|
561
564
|
data.each do |datum|
|
@@ -611,4 +614,156 @@ class IndexTest < Test::Unit::TestCase
|
|
611
614
|
hits = i.search 'move or shake'
|
612
615
|
assert_equal 1, hits.total_hits # fails when id field is present
|
613
616
|
end
|
617
|
+
|
618
|
+
def test_threading
|
619
|
+
path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
620
|
+
index = Ferret::Index::Index.new(:path => path, :create => true)
|
621
|
+
|
622
|
+
100.times do |i|
|
623
|
+
buf = ''
|
624
|
+
doc = {}
|
625
|
+
doc[:id] = i
|
626
|
+
doc[:foo] = "foo #{i}"
|
627
|
+
index << doc
|
628
|
+
end
|
629
|
+
|
630
|
+
threads = []
|
631
|
+
|
632
|
+
4.times do
|
633
|
+
threads << Thread.new(index) do |index|
|
634
|
+
result = index.search('id:42')
|
635
|
+
assert_equal(1, result.total_hits)
|
636
|
+
end
|
637
|
+
end
|
638
|
+
|
639
|
+
threads.each{|t| t.join }
|
640
|
+
end
|
641
|
+
|
642
|
+
def test_wildcard
|
643
|
+
i = nil
|
644
|
+
Ferret::I.new do |i|
|
645
|
+
i << "one"
|
646
|
+
assert_equal(1, i.search("*").total_hits)
|
647
|
+
i << "two"
|
648
|
+
assert_equal(2, i.search("*").total_hits)
|
649
|
+
i << {:content => "three"}
|
650
|
+
assert_equal(3, i.search("*").total_hits)
|
651
|
+
assert_equal(3, i.search("id:*").total_hits)
|
652
|
+
assert_equal(2, i.search('id:?*').total_hits)
|
653
|
+
end
|
654
|
+
assert_raise(StandardError) {i.close}
|
655
|
+
end
|
656
|
+
|
657
|
+
def test_highlighter()
|
658
|
+
index = Ferret::I.new(:default_field => :field,
|
659
|
+
:default_input_field => :field,
|
660
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new)
|
661
|
+
[
|
662
|
+
"the words we are searching for are one and two also " +
|
663
|
+
"sometimes looking for them as a phrase like this; one " +
|
664
|
+
"two lets see how it goes"
|
665
|
+
].each {|doc| index << doc }
|
666
|
+
|
667
|
+
highlights = index.highlight("one", 0,
|
668
|
+
:excerpt_length => 10,
|
669
|
+
:num_excerpts => 1)
|
670
|
+
|
671
|
+
assert_equal(1, highlights.size)
|
672
|
+
assert_equal("...are <b>one</b>...", highlights[0])
|
673
|
+
|
674
|
+
highlights = index.highlight("one", 0,
|
675
|
+
:excerpt_length => 10,
|
676
|
+
:num_excerpts => 2)
|
677
|
+
assert_equal(2, highlights.size)
|
678
|
+
assert_equal("...are <b>one</b>...", highlights[0])
|
679
|
+
assert_equal("...this; <b>one</b>...", highlights[1])
|
680
|
+
|
681
|
+
highlights = index.highlight("one", 0,
|
682
|
+
:excerpt_length => 10,
|
683
|
+
:num_excerpts => 3)
|
684
|
+
assert_equal(3, highlights.size)
|
685
|
+
assert_equal("the words...", highlights[0])
|
686
|
+
assert_equal("...are <b>one</b>...", highlights[1])
|
687
|
+
assert_equal("...this; <b>one</b>...", highlights[2])
|
688
|
+
|
689
|
+
highlights = index.highlight("one", 0,
|
690
|
+
:excerpt_length => 10,
|
691
|
+
:num_excerpts => 4)
|
692
|
+
assert_equal(3, highlights.size)
|
693
|
+
assert_equal("the words we are...", highlights[0])
|
694
|
+
assert_equal("...are <b>one</b>...", highlights[1])
|
695
|
+
assert_equal("...this; <b>one</b>...", highlights[2])
|
696
|
+
|
697
|
+
highlights = index.highlight("one", 0,
|
698
|
+
:excerpt_length => 10,
|
699
|
+
:num_excerpts => 5)
|
700
|
+
assert_equal(2, highlights.size)
|
701
|
+
assert_equal("the words we are searching for are <b>one</b>...", highlights[0])
|
702
|
+
assert_equal("...this; <b>one</b>...", highlights[1])
|
703
|
+
|
704
|
+
highlights = index.highlight("one", 0,
|
705
|
+
:excerpt_length => 10,
|
706
|
+
:num_excerpts => 20)
|
707
|
+
assert_equal(1, highlights.size)
|
708
|
+
assert_equal("the words we are searching for are <b>one</b> and two also " +
|
709
|
+
"sometimes looking for them as a phrase like this; <b>one</b> " +
|
710
|
+
"two lets see how it goes", highlights[0])
|
711
|
+
|
712
|
+
highlights = index.highlight("one", 0,
|
713
|
+
:excerpt_length => 1000,
|
714
|
+
:num_excerpts => 1)
|
715
|
+
assert_equal(1, highlights.size)
|
716
|
+
assert_equal("the words we are searching for are <b>one</b> and two also " +
|
717
|
+
"sometimes looking for them as a phrase like this; <b>one</b> " +
|
718
|
+
"two lets see how it goes", highlights[0])
|
719
|
+
|
720
|
+
highlights = index.highlight("(one two)", 0,
|
721
|
+
:excerpt_length => 15,
|
722
|
+
:num_excerpts => 2)
|
723
|
+
assert_equal(2, highlights.size)
|
724
|
+
assert_equal("...<b>one</b> and <b>two</b>...", highlights[0])
|
725
|
+
assert_equal("...this; <b>one</b> <b>two</b>...", highlights[1])
|
726
|
+
|
727
|
+
highlights = index.highlight('one two "one two"', 0,
|
728
|
+
:excerpt_length => 15,
|
729
|
+
:num_excerpts => 2)
|
730
|
+
assert_equal(2, highlights.size)
|
731
|
+
assert_equal("...<b>one</b> and <b>two</b>...", highlights[0])
|
732
|
+
assert_equal("...this; <b>one two</b>...", highlights[1])
|
733
|
+
|
734
|
+
highlights = index.highlight('"one two"', 0,
|
735
|
+
:excerpt_length => 15,
|
736
|
+
:num_excerpts => 1)
|
737
|
+
assert_equal(1, highlights.size)
|
738
|
+
# should have a higher priority since it the merger of three matches
|
739
|
+
assert_equal("...this; <b>one two</b>...", highlights[0])
|
740
|
+
|
741
|
+
highlights = index.highlight('"one two"', 0, :field => :not_a_field,
|
742
|
+
:excerpt_length => 15,
|
743
|
+
:num_excerpts => 1)
|
744
|
+
assert_nil(highlights)
|
745
|
+
|
746
|
+
highlights = index.highlight("wrong_field:one", 0, :field => :wrong_field,
|
747
|
+
:excerpt_length => 15,
|
748
|
+
:num_excerpts => 1)
|
749
|
+
assert_nil(highlights)
|
750
|
+
|
751
|
+
highlights = index.highlight('"the words" "for are one and two" ' +
|
752
|
+
'words one two', 0,
|
753
|
+
:excerpt_length => 10,
|
754
|
+
:num_excerpts => 1)
|
755
|
+
assert_equal(1, highlights.size)
|
756
|
+
assert_equal("...<b>for are one and two</b>...", highlights[0])
|
757
|
+
|
758
|
+
highlights = index.highlight('"the words" "for are one and two" ' +
|
759
|
+
'words one two', 0,
|
760
|
+
:excerpt_length => 10,
|
761
|
+
:num_excerpts => 2)
|
762
|
+
assert_equal(2, highlights.size)
|
763
|
+
assert_equal("<b>the words</b>...", highlights[0])
|
764
|
+
assert_equal("...<b>for are one and two</b>...", highlights[1])
|
765
|
+
|
766
|
+
|
767
|
+
index.close
|
768
|
+
end
|
614
769
|
end
|
@@ -63,6 +63,13 @@ module IndexReaderCommon
|
|
63
63
|
assert_equal(1, te.doc_freq)
|
64
64
|
assert(!te.next?)
|
65
65
|
|
66
|
+
expected = %w{is 1 more 1 not 1 skip 42 stored 1 text 1 which 1}
|
67
|
+
te = @ir.terms(:text)
|
68
|
+
te.each do |term, doc_freq|
|
69
|
+
assert_equal(expected.shift, term)
|
70
|
+
assert_equal(expected.shift.to_i, doc_freq)
|
71
|
+
end
|
72
|
+
|
66
73
|
te = @ir.terms_from(:body, "Not")
|
67
74
|
assert_equal("Not", te.term)
|
68
75
|
assert_equal(1, te.doc_freq)
|
@@ -177,7 +184,7 @@ module IndexReaderCommon
|
|
177
184
|
|
178
185
|
def do_test_get_doc()
|
179
186
|
doc = @ir.get_document(3)
|
180
|
-
|
187
|
+
[:author, :body, :title, :year].each {|fn| assert(doc.fields.include?(fn))}
|
181
188
|
assert_equal(4, doc.fields.size)
|
182
189
|
assert_equal(0, doc.size)
|
183
190
|
assert_equal([], doc.keys)
|
@@ -296,6 +303,7 @@ module IndexReaderCommon
|
|
296
303
|
assert_equal(doc_count, ir2.max_doc())
|
297
304
|
assert_equal(doc_count, ir2.num_docs())
|
298
305
|
|
306
|
+
ir2.close
|
299
307
|
ir2 = ir_new()
|
300
308
|
assert(ir2.has_deletions?())
|
301
309
|
assert_equal(doc_count, ir2.max_doc())
|
@@ -325,6 +333,7 @@ module IndexReaderCommon
|
|
325
333
|
assert_equal(doc_count - 6, ir3.max_doc())
|
326
334
|
assert_equal(doc_count - 6, ir3.num_docs())
|
327
335
|
|
336
|
+
ir2.close()
|
328
337
|
ir3.close()
|
329
338
|
end
|
330
339
|
end
|
@@ -358,7 +367,7 @@ class MultiReaderTest < Test::Unit::TestCase
|
|
358
367
|
@ir = ir_new()
|
359
368
|
end
|
360
369
|
|
361
|
-
def
|
370
|
+
def teardown()
|
362
371
|
@ir.close()
|
363
372
|
@dir.close()
|
364
373
|
end
|
@@ -406,9 +415,102 @@ class MultiExternalReaderTest < Test::Unit::TestCase
|
|
406
415
|
@ir = ir_new
|
407
416
|
end
|
408
417
|
|
409
|
-
def
|
418
|
+
def teardown()
|
419
|
+
@ir.close()
|
420
|
+
@dirs.each {|dir| dir.close}
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
class MultiExternalReaderDirTest < Test::Unit::TestCase
|
425
|
+
include IndexReaderCommon
|
426
|
+
|
427
|
+
def ir_new
|
428
|
+
IndexReader.new(@dirs)
|
429
|
+
end
|
430
|
+
|
431
|
+
def iw_optimize
|
432
|
+
@dirs.each do |dir|
|
433
|
+
iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new())
|
434
|
+
iw.optimize()
|
435
|
+
iw.close()
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
def setup()
|
440
|
+
@dirs = []
|
441
|
+
|
442
|
+
[
|
443
|
+
[0, 10],
|
444
|
+
[10, 30],
|
445
|
+
[30, IndexTestHelper::INDEX_TEST_DOCS.size]
|
446
|
+
].each do |start, finish|
|
447
|
+
dir = Ferret::Store::RAMDirectory.new()
|
448
|
+
@dirs << dir
|
449
|
+
|
450
|
+
iw = IndexWriter.new(:dir => dir,
|
451
|
+
:analyzer => WhiteSpaceAnalyzer.new(),
|
452
|
+
:create => true,
|
453
|
+
:field_infos => IndexTestHelper::INDEX_TEST_FIS)
|
454
|
+
(start...finish).each do |doc_id|
|
455
|
+
iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id]
|
456
|
+
end
|
457
|
+
iw.close()
|
458
|
+
end
|
459
|
+
@ir = ir_new
|
460
|
+
end
|
461
|
+
|
462
|
+
def teardown()
|
463
|
+
@ir.close()
|
464
|
+
@dirs.each {|dir| dir.close}
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
class MultiExternalReaderPathTest < Test::Unit::TestCase
|
469
|
+
include IndexReaderCommon
|
470
|
+
|
471
|
+
def ir_new
|
472
|
+
IndexReader.new(@paths)
|
473
|
+
end
|
474
|
+
|
475
|
+
def iw_optimize
|
476
|
+
@paths.each do |path|
|
477
|
+
iw = IndexWriter.new(:path => path, :analyzer => WhiteSpaceAnalyzer.new())
|
478
|
+
iw.optimize()
|
479
|
+
iw.close()
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
def setup()
|
484
|
+
base_dir = File.expand_path(File.join(File.dirname(__FILE__),
|
485
|
+
'../../temp/multidir'))
|
486
|
+
FileUtils.mkdir_p(base_dir)
|
487
|
+
@paths = [
|
488
|
+
File.join(base_dir, "i1"),
|
489
|
+
File.join(base_dir, "i2"),
|
490
|
+
File.join(base_dir, "i3")
|
491
|
+
]
|
492
|
+
|
493
|
+
[
|
494
|
+
[0, 10],
|
495
|
+
[10, 30],
|
496
|
+
[30, IndexTestHelper::INDEX_TEST_DOCS.size]
|
497
|
+
].each_with_index do |(start, finish), i|
|
498
|
+
path = @paths[i]
|
499
|
+
|
500
|
+
iw = IndexWriter.new(:path => path,
|
501
|
+
:analyzer => WhiteSpaceAnalyzer.new(),
|
502
|
+
:create => true,
|
503
|
+
:field_infos => IndexTestHelper::INDEX_TEST_FIS)
|
504
|
+
(start...finish).each do |doc_id|
|
505
|
+
iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id]
|
506
|
+
end
|
507
|
+
iw.close()
|
508
|
+
end
|
509
|
+
@ir = ir_new
|
510
|
+
end
|
511
|
+
|
512
|
+
def teardown()
|
410
513
|
@ir.close()
|
411
|
-
@dir.close()
|
412
514
|
end
|
413
515
|
end
|
414
516
|
|
@@ -420,7 +522,7 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
420
522
|
@dir = Ferret::Store::RAMDirectory.new()
|
421
523
|
end
|
422
524
|
|
423
|
-
def
|
525
|
+
def teardown()
|
424
526
|
@dir.close()
|
425
527
|
end
|
426
528
|
|
@@ -445,6 +547,7 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
445
547
|
@dir = Ferret::Store::RAMDirectory.new(@fs_dir)
|
446
548
|
ir = IndexReader.new(@dir)
|
447
549
|
assert_equal(doc, ir.get_document(0).load)
|
550
|
+
ir.close
|
448
551
|
end
|
449
552
|
|
450
553
|
def do_test_term_vectors(ir)
|