ferret 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +1 -1
  2. data/TODO +3 -0
  3. data/ext/dummy.exe +0 -0
  4. data/lib/ferret.rb +1 -1
  5. data/lib/ferret/analysis/token.rb +6 -0
  6. data/lib/ferret/analysis/tokenizers.rb +5 -5
  7. data/lib/ferret/document/document.rb +10 -13
  8. data/lib/ferret/index/compound_file_io.rb +12 -9
  9. data/lib/ferret/index/field_infos.rb +0 -6
  10. data/lib/ferret/index/index.rb +220 -102
  11. data/lib/ferret/index/index_reader.rb +22 -2
  12. data/lib/ferret/index/index_writer.rb +55 -14
  13. data/lib/ferret/index/multi_reader.rb +279 -279
  14. data/lib/ferret/index/segment_infos.rb +3 -3
  15. data/lib/ferret/index/segment_merger.rb +7 -6
  16. data/lib/ferret/index/segment_reader.rb +23 -7
  17. data/lib/ferret/index/segment_term_enum.rb +6 -7
  18. data/lib/ferret/index/term_buffer.rb +3 -5
  19. data/lib/ferret/index/term_doc_enum.rb +7 -2
  20. data/lib/ferret/index/term_infos_io.rb +15 -8
  21. data/lib/ferret/query_parser/query_parser.tab.rb +49 -45
  22. data/lib/ferret/search/boolean_query.rb +3 -4
  23. data/lib/ferret/search/boolean_scorer.rb +11 -11
  24. data/lib/ferret/search/caching_wrapper_filter.rb +1 -1
  25. data/lib/ferret/search/disjunction_sum_scorer.rb +9 -7
  26. data/lib/ferret/search/field_cache.rb +1 -2
  27. data/lib/ferret/search/field_sorted_hit_queue.rb +1 -1
  28. data/lib/ferret/search/fuzzy_term_enum.rb +64 -58
  29. data/lib/ferret/search/index_searcher.rb +16 -9
  30. data/lib/ferret/search/prefix_query.rb +7 -0
  31. data/lib/ferret/search/query_filter.rb +1 -1
  32. data/lib/ferret/search/term_scorer.rb +5 -1
  33. data/lib/ferret/search/top_docs.rb +12 -0
  34. data/lib/ferret/store/buffered_index_io.rb +5 -6
  35. data/lib/ferret/store/fs_store.rb +47 -33
  36. data/lib/ferret/store/ram_store.rb +2 -2
  37. data/lib/ferret/utils.rb +1 -0
  38. data/lib/ferret/utils/bit_vector.rb +20 -2
  39. data/lib/ferret/utils/thread_local.rb +28 -0
  40. data/lib/ferret/utils/weak_key_hash.rb +11 -2
  41. data/test/benchmark/tb_rw_vint.rb +1 -1
  42. data/test/functional/thread_safety_index_test.rb +81 -0
  43. data/test/functional/thread_safety_test.rb +137 -0
  44. data/test/test_all.rb +3 -7
  45. data/test/test_helper.rb +2 -1
  46. data/test/unit/index/tc_compound_file_io.rb +2 -2
  47. data/test/unit/index/tc_index.rb +128 -6
  48. data/test/unit/index/tc_index_reader.rb +1 -1
  49. data/test/unit/index/tc_segment_infos.rb +1 -1
  50. data/test/unit/index/th_doc.rb +1 -1
  51. data/test/unit/search/tc_index_searcher.rb +6 -0
  52. data/test/unit/store/tc_fs_store.rb +3 -3
  53. data/test/unit/utils/tc_bit_vector.rb +8 -0
  54. data/test/unit/utils/tc_thread.rb +61 -0
  55. data/test/unit/utils/tc_weak_key_hash.rb +2 -2
  56. data/test/utils/number_to_spoken.rb +132 -0
  57. metadata +7 -2
@@ -62,8 +62,11 @@ module Ferret::Index
62
62
  FieldOption.new("TERM_VECTOR_WITH_POSITION_OFFSET")
63
63
  end
64
64
 
65
- # directory:: Directory where IndexReader files reside.
66
- # segment_infos:: Used for write-l
65
+ # To create an IndexReader use the IndexReader.open method. This method
66
+ # should only be used by subclasses.
67
+ #
68
+ # directory:: Directory where IndexReader files reside.
69
+ # segment_infos:: Used for write-l
67
70
  # close_directory:: close the directory when the index reader is closed
68
71
  def initialize(directory, segment_infos = nil,
69
72
  close_directory = false, directory_owner = false)
@@ -81,7 +84,24 @@ module Ferret::Index
81
84
  end
82
85
 
83
86
  # Returns an index reader to read the index in the directory
87
+ #
88
+ # directory:: This can either be a Directory object or you can pass
89
+ # nil (RamDirectory is created) or a path (FSDirectory
90
+ # is created). If you chose the second or third options,
91
+ # you should leave close_directory as true and infos as
92
+ # nil.
93
+ # close_directory:: True if you want the IndexReader to close the
94
+ # directory when the IndexReader is closed. You'll want
95
+ # to set this to false if other objects are using the
96
+ # same directory object.
97
+ # infos:: Expert: This can be used to read an different version
98
+ # of the index but should really be left alone.
84
99
  def IndexReader.open(directory, close_directory = true, infos = nil)
100
+ if directory.nil?
101
+ directory = Ferret::Store::RAMDirectory.new
102
+ elsif directory.is_a?(String)
103
+ directory = Ferret::Store::FSDirectory.new(directory, true)
104
+ end
85
105
  directory.synchronize do # in- & inter-process sync
86
106
  commit_lock = directory.make_lock(IndexWriter::COMMIT_LOCK_NAME)
87
107
  commit_lock.while_locked() do
@@ -54,6 +54,9 @@ module Index
54
54
  # NOTE:: all options are passed in a hash.
55
55
  #
56
56
  # dir:: the index directory
57
+ #
58
+ # == Options
59
+ #
57
60
  # analyzer:: the analyzer to use. Defaults to StandardAnalyzer.
58
61
  # create:: +true+ to create the index or overwrite the existing
59
62
  # one +false+ to append to the existing index
@@ -62,17 +65,23 @@ module Index
62
65
  # close_dir:: This specifies whether you would this class to close
63
66
  # the index directory when this class is closed. The
64
67
  # default is false.
65
- def initialize(dir, options = {})
68
+ # use_compound_file:: Use a compound file to store the index. This is
69
+ # slower than using multiple files but it prevents the
70
+ # too many files open error. This defaults to true.
71
+ def initialize(dir = nil, options = {})
66
72
  super()
67
- create = options[:create]||false
68
- create_if_missing = options[:create_if_missing]||false
73
+ create = options[:create] || false
74
+ create_if_missing = options[:create_if_missing] || false
69
75
 
70
- if dir.instance_of?(String)
71
- @directory = FSDirectory.get_directory(dir, create||create_if_missing)
76
+ if dir.nil?
77
+ @directory = Ferret::Store::RAMDirectory.new
78
+ elsif dir.is_a?(String)
79
+ @directory = Ferret::Store::FSDirectory.new(dir, create)
72
80
  else
73
81
  @directory = dir
74
82
  end
75
83
  @close_dir = options[:close_dir] || false
84
+ @use_compound_file = (options[:use_compound_file] != false) # ie default true
76
85
  @analyzer = options[:analyzer] || Ferret::Analysis::StandardAnalyzer.new
77
86
  @merge_factor = DEFAULT_MERGE_FACTOR
78
87
  @min_merge_docs = DEFAULT_MIN_MERGE_DOCS
@@ -108,6 +117,8 @@ module Index
108
117
  end
109
118
  end
110
119
  end
120
+
121
+ @info_stream = nil
111
122
  end
112
123
 
113
124
  # Flushes all changes to an index and closes all associated files.
@@ -125,11 +136,11 @@ module Index
125
136
 
126
137
  # Returns the number of documents currently in this index.
127
138
  def doc_count()
128
- count = 0
129
139
  synchronize() do
140
+ count = 0
130
141
  @segment_infos.each { |si| count += si.doc_count() }
142
+ return count
131
143
  end
132
- return count
133
144
  end
134
145
 
135
146
  # Adds a document to this index, using the provided analyzer instead of the
@@ -223,7 +234,7 @@ module Index
223
234
  merger = SegmentMerger.new(@directory, merged_name, @term_index_interval)
224
235
 
225
236
  if (@segment_infos.size() == 1) # add existing index, if any
226
- s_reader = SegmentReader.new(@segment_infos[0])
237
+ s_reader = SegmentReader.get(@segment_infos[0])
227
238
  merger << s_reader
228
239
  segments_to_delete << s_reader
229
240
  end
@@ -232,7 +243,7 @@ module Index
232
243
  merger << reader
233
244
  end
234
245
 
235
- doc_count = merger.merge!() # merge 'em
246
+ doc_count = merger.merge() # merge 'em
236
247
 
237
248
  @segment_infos.clear() # pop old infos & add new
238
249
  @segment_infos << SegmentInfo.new(merged_name, doc_count, @directory)
@@ -241,9 +252,22 @@ module Index
241
252
  @directory.make_lock(COMMIT_LOCK_NAME).while_locked(COMMIT_LOCK_TIMEOUT) do
242
253
  @segment_infos.write(@directory) # commit changes
243
254
  delete_segments(segments_to_delete)
244
- return nil
245
255
  end
246
256
  end
257
+
258
+ if @use_compound_file
259
+ files_to_delete = merger.create_compound_file(merged_name + ".tmp")
260
+ @directory.synchronize() do # in- & inter-process sync
261
+ @directory.make_lock(COMMIT_LOCK_NAME).while_locked(COMMIT_LOCK_TIMEOUT) do
262
+ # make compound file visible for SegmentReaders
263
+ @directory.rename(merged_name + ".tmp", merged_name + ".cfs")
264
+ # delete now unused files of segment
265
+ delete_files_and_write_undeletable(files_to_delete)
266
+ end
267
+ end
268
+ end
269
+
270
+ optimize()
247
271
  end
248
272
  end
249
273
 
@@ -379,11 +403,10 @@ module Index
379
403
  merged_doc_count = merger.merge()
380
404
 
381
405
  if (@info_stream != nil)
382
- @info_stream.print(" into " + merged_name + " (" + merged_doc_count.to_s + " docs)\n")
406
+ @info_stream.print(" into #{merged_name} (#{merged_doc_count.to_s} docs)\n")
383
407
  end
384
408
 
385
409
  (max_segment-1).downto(min_segment) {|i| @segment_infos.delete_at(i) }
386
- #@segment_infos = @segment_infos[0,min_segment] + @segment_infos[max_segment...-1]
387
410
 
388
411
  @segment_infos << SegmentInfo.new(merged_name, merged_doc_count, @directory)
389
412
 
@@ -394,10 +417,21 @@ module Index
394
417
  @directory.make_lock(COMMIT_LOCK_NAME).while_locked(COMMIT_LOCK_TIMEOUT) do
395
418
  @segment_infos.write(@directory) # commit before deleting
396
419
  delete_segments(segments_to_delete) # delete now-unused segments
397
- return nil
398
420
  end
399
421
  end
400
- segments_to_delete.size.times {|i| segments_to_delete[i] = nil }
422
+
423
+ if @use_compound_file
424
+ files_to_delete = merger.create_compound_file(merged_name + ".tmp")
425
+ @directory.synchronize() do # in- & inter-process sync
426
+ @directory.make_lock(COMMIT_LOCK_NAME).while_locked(COMMIT_LOCK_TIMEOUT) do
427
+ # make compound file visible for SegmentReaders
428
+ @directory.rename(merged_name + ".tmp", merged_name + ".cfs")
429
+ # delete now unused files of segment
430
+ delete_files_and_write_undeletable(files_to_delete)
431
+ end
432
+ end
433
+ end
434
+
401
435
  end
402
436
 
403
437
  # Some operating systems (e.g. Windows) don't permit a file to be
@@ -440,6 +474,13 @@ module Index
440
474
 
441
475
  end
442
476
 
477
+ def delete_files_and_write_undeletable(files)
478
+ deletable = []
479
+ try_to_delete_files(read_deleteable_files(), deletable) # try to delete deleteable
480
+ try_to_delete_files(files, deletable) # try to delete our files
481
+ write_deleteable_files(deletable) # note files we can't delete
482
+ end
483
+
443
484
  def delete_files(file_names, dir)
444
485
  file_names.each do |file_name|
445
486
  dir.delete(file_name)
@@ -1,133 +1,133 @@
1
- module Ferret
2
- module Index
3
- # An IndexReader which reads multiple indexes, appending their content.
4
- class MultiReader < IndexReader
5
- attr_reader :max_doc
6
-
7
- # Construct a MultiReader aggregating the named set of (sub)readers.
8
- # Directory locking for delete, undeleteAll, and set_norm operations is
9
- # left to the subreaders.
10
- #
11
- # Note that all subreaders are closed if this Multireader is closed.
12
- # sub_readers:: set of (sub)readers
13
- # raises:: IOException
14
- def initialize(sub_readers, directory = nil, sis = nil, close_dir = false)
15
- if (directory)
16
- super(directory, sis, close_dir)
17
- else
18
- super(sub_readers.length == 0 ? nil : sub_readers[0].directory())
1
+ module Ferret::Index
2
+ # An IndexReader which reads multiple indexes, appending their content.
3
+ class MultiReader < IndexReader
4
+ attr_reader :max_doc
5
+
6
+ # Construct a MultiReader aggregating the named set of (sub)readers.
7
+ # Directory locking for delete, undeleteAll, and set_norm operations is
8
+ # left to the subreaders.
9
+ #
10
+ # Note that all subreaders are closed if this Multireader is closed.
11
+ # sub_readers:: set of (sub)readers
12
+ # raises:: IOException
13
+ def initialize(sub_readers, directory = nil, sis = nil, close_dir = false)
14
+ if (directory)
15
+ super(directory, sis, close_dir)
16
+ else
17
+ super(sub_readers.length == 0 ? nil : sub_readers[0].directory())
18
+ end
19
+
20
+ @max_doc = 0
21
+ @num_docs = -1
22
+ @has_deletions = false
23
+
24
+ @sub_readers = sub_readers
25
+ @starts = Array.new(@sub_readers.length + 1) # build starts array
26
+ @sub_readers.each_with_index do |sub_reader, i|
27
+ @starts[i] = @max_doc
28
+ @max_doc += sub_reader.max_doc # compute max_docs
29
+
30
+ if @sub_readers[i].has_deletions?
31
+ @has_deletions = true
19
32
  end
20
-
21
- @max_doc = 0
22
- @num_docs = -1
23
- @has_deletions = false
24
-
25
- @sub_readers = sub_readers
26
- @starts = Array.new(@sub_readers.length + 1) # build starts array
27
- @sub_readers.each_with_index do |sub_reader, i|
28
- @starts[i] = @max_doc
29
- @max_doc += sub_reader.max_doc # compute maxDocs
30
-
31
- if @sub_readers[i].has_deletions?
32
- @has_deletions = true
33
- end
34
- end
35
- @starts[@sub_readers.length] = @max_doc
36
- @norms_cache = {}
37
33
  end
34
+ @starts[@sub_readers.length] = @max_doc
35
+ @norms_cache = {}
36
+ end
38
37
 
39
38
 
40
- # Return an array of term frequency vectors for the specified document. The
41
- # array contains a vector for each vectorized field in the document. Each
42
- # vector vector contains term numbers and frequencies for all terms in a
43
- # given vectorized field. If no such fields existed, the method returns
44
- # nil.
45
- def get_term_vectors(n)
46
- i = reader_index(n) # find segment num
47
- return @sub_readers[i].get_term_vectors(n - @starts[i]); # dispatch to segment
48
- end
39
+ # Return an array of term frequency vectors for the specified document. The
40
+ # array contains a vector for each vectorized field in the document. Each
41
+ # vector vector contains term numbers and frequencies for all terms in a
42
+ # given vectorized field. If no such fields existed, the method returns
43
+ # nil.
44
+ def get_term_vectors(n)
45
+ i = reader_index(n) # find segment num
46
+ return @sub_readers[i].get_term_vectors(n - @starts[i]); # dispatch to segment
47
+ end
49
48
 
50
- def get_term_vector(n, field)
51
- i = reader_index(n) # find segment num
52
- return @sub_readers[i].get_term_vector(n - @starts[i], field)
53
- end
49
+ def get_term_vector(n, field)
50
+ i = reader_index(n) # find segment num
51
+ return @sub_readers[i].get_term_vector(n - @starts[i], field)
52
+ end
54
53
 
55
- def num_docs()
56
- synchronize do
57
- if (@num_docs == -1) # check cache
58
- n = 0 # cache miss -= 1recompute
59
- @sub_readers.each {|reader| n += reader.num_docs()}
60
- @num_docs = n
61
- end
62
- return @num_docs
54
+ def num_docs()
55
+ synchronize do
56
+ if (@num_docs == -1) # check cache
57
+ n = 0 # cache miss -= 1recompute
58
+ @sub_readers.each {|reader| n += reader.num_docs()}
59
+ @num_docs = n
63
60
  end
61
+ return @num_docs
64
62
  end
63
+ end
65
64
 
66
- def get_document(n)
67
- i = reader_index(n) # find segment num
68
- return @sub_readers[i].get_document(n - @starts[i]) # dispatch to segment reader
69
- end
65
+ def get_document(n)
66
+ i = reader_index(n) # find segment num
67
+ return @sub_readers[i].get_document(n - @starts[i]) # dispatch to segment reader
68
+ end
70
69
 
71
- def deleted?(n)
72
- i = reader_index(n) # find segment num
73
- return @sub_readers[i].deleted?(n - @starts[i]) # dispatch to segment reader
74
- end
70
+ def deleted?(n)
71
+ i = reader_index(n) # find segment num
72
+ return @sub_readers[i].deleted?(n - @starts[i]) # dispatch to segment reader
73
+ end
75
74
 
76
- def has_deletions?()
77
- return @has_deletions
78
- end
75
+ def has_deletions?()
76
+ return @has_deletions
77
+ end
79
78
 
80
- def do_delete(n)
81
- @num_docs = -1 # invalidate cache
82
- i = reader_index(n) # find segment num
83
- @sub_readers[i].delete(n - @starts[i]) # dispatch to segment reader
84
- @has_deletions = true
85
- end
79
+ def do_delete(n)
80
+ @num_docs = -1 # invalidate cache
81
+ i = reader_index(n) # find segment num
82
+ @sub_readers[i].delete(n - @starts[i]) # dispatch to segment reader
83
+ @has_deletions = true
84
+ end
86
85
 
87
- def do_undelete_all()
88
- @num_docs = -1 # invalidate cache
89
- @sub_readers.each {|reader| reader.undelete_all() }
90
- @has_deletions = false
91
- end
86
+ def do_undelete_all()
87
+ @num_docs = -1 # invalidate cache
88
+ @sub_readers.each {|reader| reader.undelete_all() }
89
+ @has_deletions = false
90
+ end
92
91
 
93
- def reader_index(n) # find reader for doc n:
94
- lo = 0 # search @starts array
95
- hi = @sub_readers.length - 1 # for first element less
96
-
97
- while (hi >= lo)
98
- mid = (lo + hi) >> 1
99
- mid_value = @starts[mid]
100
- if (n < mid_value)
101
- hi = mid - 1
102
- elsif (n > mid_value)
103
- lo = mid + 1
104
- else # found a match
105
- while (mid+1 < @sub_readers.length and @starts[mid+1] == mid_value)
106
- mid += 1 # scan to last match
107
- end
108
- return mid
92
+ def reader_index(n) # find reader for doc n:
93
+ lo = 0 # search @starts array
94
+ hi = @sub_readers.length - 1 # for first element less
95
+
96
+ while (hi >= lo)
97
+ mid = (lo + hi) >> 1
98
+ mid_value = @starts[mid]
99
+ if (n < mid_value)
100
+ hi = mid - 1
101
+ elsif (n > mid_value)
102
+ lo = mid + 1
103
+ else # found a match
104
+ while (mid+1 < @sub_readers.length and @starts[mid+1] == mid_value)
105
+ mid += 1 # scan to last match
109
106
  end
107
+ return mid
110
108
  end
111
- return hi
112
109
  end
110
+ return hi
111
+ end
113
112
 
114
- def get_norms(field)
115
- synchronize do
116
- bytes = @norms_cache[field]
117
- if (bytes != nil)
118
- return bytes # cache hit
119
- end
113
+ def get_norms(field)
114
+ synchronize do
115
+ bytes = @norms_cache[field]
116
+ if (bytes != nil)
117
+ return bytes # cache hit
118
+ end
120
119
 
121
- bytes = " " * @max_doc
122
- @sub_readers.length.times do |i|
123
- @sub_readers[i].get_norms_into(field, bytes, @starts[i])
124
- end
125
- @norms_cache[field] = bytes # update cache
126
- return bytes
120
+ bytes = " " * @max_doc
121
+ @sub_readers.length.times do |i|
122
+ @sub_readers[i].get_norms_into(field, bytes, @starts[i])
127
123
  end
124
+ @norms_cache[field] = bytes # update cache
125
+ return bytes
128
126
  end
127
+ end
129
128
 
130
- def get_norms_into(field, buf, offset)
129
+ def get_norms_into(field, buf, offset)
130
+ synchronize do
131
131
  bytes = @norms_cache[field]
132
132
  if (bytes != nil) # cache hit
133
133
  buf[offset ,@max_doc] = bytes[0, @max_doc]
@@ -138,226 +138,226 @@ module Ferret
138
138
  @sub_readers[i].get_norms_into(field, buf, offset + @starts[i])
139
139
  end
140
140
  end
141
+ end
141
142
 
142
- def do_set_norm(n, field, value)
143
- @norms_cache.delete(field) # clear cache
144
- i = reader_index(n) # find segment num
145
- @sub_readers[i].set_norm(n-@starts[i], field, value); # dispatch
146
- end
143
+ def do_set_norm(n, field, value)
144
+ @norms_cache.delete(field) # clear cache
145
+ i = reader_index(n) # find segment num
146
+ @sub_readers[i].set_norm(n-@starts[i], field, value); # dispatch
147
+ end
147
148
 
148
- def terms()
149
- return MultiTermEnum.new(@sub_readers, @starts, nil)
150
- end
149
+ def terms()
150
+ return MultiTermEnum.new(@sub_readers, @starts, nil)
151
+ end
151
152
 
152
- def terms_from(term)
153
- return MultiTermEnum.new(@sub_readers, @starts, term)
154
- end
153
+ def terms_from(term)
154
+ return MultiTermEnum.new(@sub_readers, @starts, term)
155
+ end
155
156
 
156
- def doc_freq(t)
157
- total = 0 # sum freqs in segments
158
- @sub_readers.each {|reader| total += reader.doc_freq(t)}
159
- return total
160
- end
157
+ def doc_freq(t)
158
+ total = 0 # sum freqs in segments
159
+ @sub_readers.each {|reader| total += reader.doc_freq(t)}
160
+ return total
161
+ end
161
162
 
162
- def term_docs()
163
- return MultiTermDocEnum.new(@sub_readers, @starts)
164
- end
163
+ def term_docs()
164
+ return MultiTermDocEnum.new(@sub_readers, @starts)
165
+ end
165
166
 
166
- def term_positions()
167
- return MultiTermDocPosEnum.new(@sub_readers, @starts)
168
- end
167
+ def term_positions()
168
+ return MultiTermDocPosEnum.new(@sub_readers, @starts)
169
+ end
169
170
 
170
- def do_commit()
171
- @sub_readers.each {|reader| reader.commit() }
172
- end
171
+ def do_commit()
172
+ @sub_readers.each {|reader| reader.commit() }
173
+ end
173
174
 
174
- def do_close()
175
- synchronize do
176
- @sub_readers.each {|reader| reader.close() }
177
- end
175
+ def do_close()
176
+ synchronize do
177
+ @sub_readers.each {|reader| reader.close() }
178
178
  end
179
+ end
179
180
 
180
- # See IndexReader#get_field_names
181
- def get_field_names(field_option = IndexReader::FieldOption::ALL)
182
- # maintain a unique set of field names
183
- field_set = Set.new
184
- @sub_readers.each do |reader|
185
- field_set |= reader.get_field_names(field_option)
186
- end
187
- return field_set
181
+ # See IndexReader#get_field_names
182
+ def get_field_names(field_option = IndexReader::FieldOption::ALL)
183
+ # maintain a unique set of field names
184
+ field_set = Set.new
185
+ @sub_readers.each do |reader|
186
+ field_set |= reader.get_field_names(field_option)
188
187
  end
188
+ return field_set
189
189
  end
190
+ end
190
191
 
191
- class MultiTermEnum < TermEnum
192
+ class MultiTermEnum < TermEnum
192
193
 
193
- attr_reader :doc_freq, :term
194
+ attr_reader :doc_freq, :term
194
195
 
195
- def initialize(readers, starts, t)
196
- @queue = SegmentMergeQueue.new(readers.length)
197
- readers.each_index do |i|
198
- reader = readers[i]
199
- term_enum = nil
200
- if (t != nil)
201
- term_enum = reader.terms_from(t)
202
- else
203
- term_enum = reader.terms()
204
- end
205
- smi = SegmentMergeInfo.new(starts[i], term_enum, reader)
206
-
207
- if (t == nil and smi.next?) or term_enum.term
208
- @queue.push(smi); # initialize queue
209
- else
210
- smi.close()
211
- end
196
+ def initialize(readers, starts, t)
197
+ @queue = SegmentMergeQueue.new(readers.length)
198
+ readers.each_index do |i|
199
+ reader = readers[i]
200
+ term_enum = nil
201
+ if (t != nil)
202
+ term_enum = reader.terms_from(t)
203
+ else
204
+ term_enum = reader.terms()
212
205
  end
206
+ smi = SegmentMergeInfo.new(starts[i], term_enum, reader)
213
207
 
214
- if (t != nil and @queue.size() > 0)
215
- next?()
208
+ if (t == nil and smi.next?) or term_enum.term
209
+ @queue.push(smi); # initialize queue
210
+ else
211
+ smi.close()
216
212
  end
217
213
  end
218
214
 
219
- def next?()
220
- top = @queue.top()
221
- if (top == nil)
222
- @term = nil
223
- return false
224
- end
215
+ if (t != nil and @queue.size() > 0)
216
+ next?()
217
+ end
218
+ end
219
+
220
+ def next?()
221
+ top = @queue.top()
222
+ if (top == nil)
223
+ @term = nil
224
+ return false
225
+ end
225
226
 
226
- @term = top.term
227
- @doc_freq = 0
227
+ @term = top.term
228
+ @doc_freq = 0
228
229
 
229
- while top and @term == top.term
230
- @queue.pop()
231
- @doc_freq += top.term_enum.doc_freq() # increment freq
232
- if (top.next?)
233
- @queue.push(top) # restore queue
234
- else
235
- top.close() # done with a segment
236
- end
237
- top = @queue.top()
230
+ while top and @term == top.term
231
+ @queue.pop()
232
+ @doc_freq += top.term_enum.doc_freq() # increment freq
233
+ if (top.next?)
234
+ @queue.push(top) # restore queue
235
+ else
236
+ top.close() # done with a segment
238
237
  end
239
- return true
238
+ top = @queue.top()
240
239
  end
240
+ return true
241
+ end
241
242
 
242
- def close()
243
- @queue.close()
244
- end
243
+ def close()
244
+ @queue.close()
245
245
  end
246
+ end
246
247
 
247
- class MultiTermDocEnum < TermDocEnum
248
- attr_accessor :readers, :starts, :term, :base, :pointer, :current
248
+ class MultiTermDocEnum < TermDocEnum
249
+ attr_accessor :readers, :starts, :term, :base, :pointer, :current
249
250
 
250
- def initialize(readers, starts)
251
- @readers = readers
252
- @starts = starts
253
- @base = 0
254
- @pointer = 0
251
+ def initialize(readers, starts)
252
+ @readers = readers
253
+ @starts = starts
254
+ @base = 0
255
+ @pointer = 0
255
256
 
256
- @reader_term_docs = Array.new(readers.length)
257
- end
257
+ @reader_term_docs = Array.new(readers.length)
258
+ end
258
259
 
259
- def doc
260
- return @base + @current.doc()
261
- end
260
+ def doc
261
+ return @base + @current.doc()
262
+ end
262
263
 
263
- def freq
264
- return @current.freq()
265
- end
264
+ def freq
265
+ return @current.freq()
266
+ end
266
267
 
267
- def seek(term)
268
- @term = term
269
- @base = 0
270
- @pointer = 0
271
- @current = nil
272
- end
268
+ def seek(term)
269
+ @term = term
270
+ @base = 0
271
+ @pointer = 0
272
+ @current = nil
273
+ end
273
274
 
274
- def next?
275
- if @current and @current.next?
276
- return true
277
- elsif @pointer < @readers.length
278
- @base = @starts[@pointer]
279
- @current = term_docs(@pointer)
280
- @pointer += 1
281
- return next?()
282
- else
283
- return false
284
- end
275
+ def next?
276
+ if @current and @current.next?
277
+ return true
278
+ elsif @pointer < @readers.length
279
+ @base = @starts[@pointer]
280
+ @current = term_docs(@pointer)
281
+ @pointer += 1
282
+ return next?()
283
+ else
284
+ return false
285
285
  end
286
+ end
286
287
 
287
- # Optimized implementation. Unlike the Java version, this method
288
- # always returns as many results as it can read.
289
- def read(docs, freqs)
290
- got = 0
291
- last_got = 0
292
- needed = docs.length
293
-
294
- while (true)
295
- while @current.nil?
296
- if @pointer < @readers.length # begin next segment
297
- @base = @starts[@pointer]
298
- @current = term_docs(@pointer)
299
- @pointer += 1
300
- else
301
- return got
302
- end
288
+ # Optimized implementation. Unlike the Java version, this method
289
+ # always returns as many results as it can read.
290
+ def read(docs, freqs)
291
+ got = 0
292
+ last_got = 0
293
+ needed = docs.length
294
+
295
+ while (true)
296
+ while @current.nil?
297
+ if @pointer < @readers.length # try next segment
298
+ @base = @starts[@pointer]
299
+ @current = term_docs(@pointer)
300
+ @pointer += 1
301
+ else
302
+ return got
303
303
  end
304
- got = @current.read(docs, freqs, got)
305
- if (got == last_got) # none left in segment
306
- @current = nil
307
- else # got some
308
- b = @base # adjust doc numbers
309
- (last_got...got).each {|i| docs[i] += b}
310
- if got == needed
311
- return got
312
- else
313
- last_got = got
314
- end
304
+ end
305
+ got = @current.read(docs, freqs, got)
306
+ if (got == last_got) # none left in segment
307
+ @current = nil
308
+ else # got some
309
+ b = @base # adjust doc numbers
310
+ (last_got...got).each {|i| docs[i] += b}
311
+ if got == needed
312
+ return got
313
+ else
314
+ last_got = got
315
315
  end
316
316
  end
317
317
  end
318
+ end
318
319
 
319
- # As yet unoptimized implementation.
320
- def skip_to(target)
321
- begin
322
- return false if not next?
323
- end while target > doc()
324
- return true
325
- end
326
-
327
- def term_docs(i)
328
- return nil if (@term == nil)
329
- result = @reader_term_docs[i]
330
- if (result == nil)
331
- result = @reader_term_docs[i] = term_docs_from_reader(@readers[i])
332
- end
333
- result.seek(@term)
334
- return result
335
- end
320
+ # As yet unoptimized implementation.
321
+ def skip_to(target)
322
+ begin
323
+ return false if not next?
324
+ end while target > doc()
325
+ return true
326
+ end
336
327
 
337
- def term_docs_from_reader(reader)
338
- return reader.term_docs()
328
+ def term_docs(i)
329
+ return nil if (@term == nil)
330
+ result = @reader_term_docs[i]
331
+ if (result == nil)
332
+ result = @reader_term_docs[i] = term_docs_from_reader(@readers[i])
339
333
  end
334
+ result.seek(@term)
335
+ return result
336
+ end
340
337
 
341
- def close()
342
- @reader_term_docs.compact.each do |rtd|
343
- rtd.close()
344
- end
345
- end
338
+ def term_docs_from_reader(reader)
339
+ return reader.term_docs()
346
340
  end
347
341
 
348
- class MultiTermDocPosEnum < MultiTermDocEnum
349
- def initialize(r, s)
350
- super(r,s)
342
+ def close()
343
+ @reader_term_docs.compact.each do |rtd|
344
+ rtd.close()
351
345
  end
346
+ end
347
+ end
352
348
 
353
- def term_docs_from_reader(reader)
354
- return reader.term_positions()
355
- end
349
+ class MultiTermDocPosEnum < MultiTermDocEnum
350
+ def initialize(r, s)
351
+ super(r,s)
352
+ end
356
353
 
357
- def next_position()
358
- return @current.next_position()
359
- end
354
+ def term_docs_from_reader(reader)
355
+ return reader.term_positions()
356
+ end
360
357
 
358
+ def next_position()
359
+ return @current.next_position()
361
360
  end
361
+
362
362
  end
363
363
  end