ferret 0.3.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/lib/ferret.rb
CHANGED
@@ -22,20 +22,19 @@
|
|
22
22
|
#++
|
23
23
|
# :include: ../TUTORIAL
|
24
24
|
module Ferret
|
25
|
-
VERSION = '0.
|
25
|
+
VERSION = '0.9.0'
|
26
26
|
end
|
27
27
|
|
28
|
-
require 'ferret/utils'
|
29
|
-
require 'ferret/document'
|
30
|
-
require 'ferret/stemmers'
|
31
|
-
require 'ferret/analysis'
|
32
|
-
require 'ferret/store'
|
33
|
-
require 'ferret/index'
|
34
|
-
require 'ferret/search'
|
35
|
-
require 'ferret/query_parser'
|
36
|
-
|
37
28
|
# try and load the C extension but it isn't necessary.
|
38
29
|
begin
|
39
30
|
require 'ferret_ext'
|
40
31
|
rescue Exception => e
|
32
|
+
require 'ferret/utils'
|
33
|
+
require 'ferret/document'
|
34
|
+
require 'ferret/stemmers'
|
35
|
+
require 'ferret/analysis'
|
36
|
+
require 'ferret/store'
|
37
|
+
require 'ferret/index'
|
38
|
+
require 'ferret/search'
|
39
|
+
require 'ferret/query_parser'
|
41
40
|
end
|
@@ -28,10 +28,10 @@ module Ferret::Analysis
|
|
28
28
|
# exact PhraseQuery matches, for instance, across Field instance boundaries.
|
29
29
|
#
|
30
30
|
# field_name:: Field name being indexed.
|
31
|
-
#
|
31
|
+
# pos_inc_gap:: added to the next token emitted from
|
32
32
|
# #token_stream(String,Reader)
|
33
33
|
#
|
34
|
-
def
|
34
|
+
def pos_inc_gap(field_name)
|
35
35
|
return 0
|
36
36
|
end
|
37
37
|
|
@@ -64,7 +64,7 @@ if __FILE__ == $0
|
|
64
64
|
$stdin.each do |line|
|
65
65
|
stk = Ferret::Analysis::StandardTokenizer.new(line)
|
66
66
|
while tk = stk.next()
|
67
|
-
puts " <" + tk.
|
67
|
+
puts " <" + tk.text + "> from " + tk.start_offset.to_s + " to " + tk.end_offset.to_s
|
68
68
|
end
|
69
69
|
end
|
70
70
|
end
|
@@ -18,25 +18,25 @@ module Ferret::Analysis
|
|
18
18
|
# end_offset:: is equal to one greater than the position of the last
|
19
19
|
# character corresponding of this token Note that the
|
20
20
|
# difference between @end_offset and @start_offset may not be
|
21
|
-
# equal to @
|
21
|
+
# equal to @text.length(), as the term text may have been
|
22
22
|
# altered by a stemmer or some other filter.
|
23
23
|
class Token
|
24
24
|
include Comparable
|
25
|
-
attr_accessor :
|
26
|
-
attr_reader :
|
25
|
+
attr_accessor :text
|
26
|
+
attr_reader :pos_inc, :start_offset, :end_offset, :type
|
27
27
|
|
28
28
|
# Constructs a Token with the given term text, and start & end offsets.
|
29
29
|
# The type defaults to "word."
|
30
|
-
def initialize(txt, so, eo, typ="word"
|
31
|
-
@
|
30
|
+
def initialize(txt, so, eo, pos_inc=1, typ="word")
|
31
|
+
@text = txt
|
32
32
|
@start_offset = so
|
33
33
|
@end_offset = eo
|
34
34
|
@type = typ # lexical type
|
35
|
-
@
|
35
|
+
@pos_inc = pos_inc
|
36
36
|
end
|
37
37
|
|
38
38
|
def set!(txt, so, eo)
|
39
|
-
@
|
39
|
+
@text = txt
|
40
40
|
@start_offset = so
|
41
41
|
@end_offset = eo
|
42
42
|
self
|
@@ -44,20 +44,20 @@ module Ferret::Analysis
|
|
44
44
|
|
45
45
|
def eql?(o)
|
46
46
|
return (o.instance_of?(Token) and @start_offset == o.start_offset and
|
47
|
-
@end_offset == o.end_offset and @
|
47
|
+
@end_offset == o.end_offset and @text == o.text)
|
48
48
|
end
|
49
49
|
alias :== :eql?
|
50
50
|
|
51
51
|
# Tokens are sorted by the position in the text at which they occur, ie
|
52
52
|
# the start_offset. If two tokens have the same start offset, (see
|
53
|
-
#
|
53
|
+
# pos_inc=) then, they are sorted by the end_offset and then
|
54
54
|
# lexically by the token text.
|
55
55
|
def <=>(o)
|
56
56
|
r = @start_offset <=> o.start_offset
|
57
57
|
return r if r != 0
|
58
58
|
r = @end_offset <=> o.end_offset
|
59
59
|
return r if r != 0
|
60
|
-
r = @
|
60
|
+
r = @text <=> o.text
|
61
61
|
return r
|
62
62
|
end
|
63
63
|
|
@@ -82,17 +82,17 @@ module Ferret::Analysis
|
|
82
82
|
# words and also sets the increment to the number of stop words removed
|
83
83
|
# before each non-stop word. Then exact phrase queries will only match
|
84
84
|
# when the terms occur with no intervening stop words.
|
85
|
-
def
|
85
|
+
def pos_inc=(pos_inc)
|
86
86
|
if (pos_inc < 0)
|
87
87
|
raise ArgumentError, "Increment must be zero or greater: " + pos_inc
|
88
88
|
end
|
89
|
-
@
|
89
|
+
@pos_inc = pos_inc
|
90
90
|
end
|
91
91
|
|
92
92
|
# Returns a string representation of the token with all the attributes.
|
93
93
|
def to_s
|
94
|
-
buf = "#{
|
95
|
-
buf << "(pos_inc=#{@
|
94
|
+
buf = "#{text}:#{start_offset}->#{end_offset}"
|
95
|
+
buf << "(pos_inc=#{@pos_inc})" if (@pos_inc != 1)
|
96
96
|
buf << "(type=#{@type})" if (@type != "word")
|
97
97
|
buf
|
98
98
|
end
|
@@ -24,7 +24,7 @@ module Ferret::Analysis
|
|
24
24
|
return nil
|
25
25
|
end
|
26
26
|
|
27
|
-
t.
|
27
|
+
t.text = t.text.downcase()
|
28
28
|
|
29
29
|
return t
|
30
30
|
end
|
@@ -50,7 +50,7 @@ module Ferret::Analysis
|
|
50
50
|
def next()
|
51
51
|
# return the first non-stop word found
|
52
52
|
while token = @input.next()
|
53
|
-
return token if ! @stop_set.include?(token.
|
53
|
+
return token if ! @stop_set.include?(token.text)
|
54
54
|
end
|
55
55
|
return nil
|
56
56
|
end
|
@@ -78,7 +78,7 @@ module Ferret::Analysis
|
|
78
78
|
if (token == nil)
|
79
79
|
return nil
|
80
80
|
else
|
81
|
-
token.
|
81
|
+
token.text = Stemmable.stem_porter(token.text)
|
82
82
|
end
|
83
83
|
token
|
84
84
|
end
|
@@ -27,8 +27,7 @@ module Ferret::Document
|
|
27
27
|
# Documents returned from IndexReader#document(int) and
|
28
28
|
# Hits#doc(int) may thus not have the same value present as when this field
|
29
29
|
# was indexed.
|
30
|
-
attr_accessor :boost, :data
|
31
|
-
|
30
|
+
attr_accessor :boost, :data
|
32
31
|
attr_reader :name
|
33
32
|
|
34
33
|
# True iff the value of the field is to be stored in the index for
|
@@ -160,34 +159,34 @@ module Ferret::Document
|
|
160
159
|
# this field more important.
|
161
160
|
def initialize(name,
|
162
161
|
value,
|
163
|
-
|
162
|
+
store = Store::YES,
|
164
163
|
index = Index::UNTOKENIZED,
|
165
|
-
|
164
|
+
term_vector = TermVector::NO,
|
166
165
|
binary = false,
|
167
166
|
boost = 1.0)
|
168
|
-
if (index == Index::NO and
|
167
|
+
if (index == Index::NO and store == Store::NO)
|
169
168
|
raise ArgumentError, "it doesn't make sense to have a field that " +
|
170
169
|
"is neither indexed nor stored"
|
171
170
|
end
|
172
|
-
if (index == Index::NO &&
|
171
|
+
if (index == Index::NO && term_vector != TermVector::NO)
|
173
172
|
raise ArgumentError, "cannot store term vector information for a " +
|
174
173
|
"field that is not indexed"
|
175
174
|
end
|
176
175
|
|
177
176
|
# The name of the field (e.g., "date", "subject", "title", or "body")
|
178
|
-
@name = name
|
177
|
+
@name = name.to_s
|
179
178
|
|
180
179
|
# the one and only data object for all different kind of field values
|
181
180
|
@data = value
|
182
|
-
self.
|
181
|
+
self.store = store
|
183
182
|
self.index = index
|
184
|
-
self.
|
183
|
+
self.term_vector = term_vector
|
185
184
|
@binary = binary
|
186
185
|
@boost = boost
|
187
186
|
end
|
188
187
|
|
189
|
-
def
|
190
|
-
case
|
188
|
+
def store=(store)
|
189
|
+
case store
|
191
190
|
when Store::YES
|
192
191
|
@stored = true
|
193
192
|
@compressed = false
|
@@ -198,7 +197,7 @@ module Ferret::Document
|
|
198
197
|
@stored = false
|
199
198
|
@compressed = false
|
200
199
|
else
|
201
|
-
raise "unknown stored parameter " +
|
200
|
+
raise "unknown stored parameter " + store.to_s
|
202
201
|
end
|
203
202
|
end
|
204
203
|
|
@@ -223,8 +222,8 @@ module Ferret::Document
|
|
223
222
|
end
|
224
223
|
end
|
225
224
|
|
226
|
-
def
|
227
|
-
case
|
225
|
+
def term_vector=(term_vector)
|
226
|
+
case term_vector
|
228
227
|
when TermVector::NO
|
229
228
|
@store_term_vector = false
|
230
229
|
@store_position = false
|
@@ -303,11 +302,11 @@ module Ferret::Document
|
|
303
302
|
str << "indexed," if (@indexed)
|
304
303
|
str << "tokenized," if (@tokenized)
|
305
304
|
str << "store_term_vector," if (@store_term_vector)
|
306
|
-
str << "
|
307
|
-
str << "
|
305
|
+
str << "store_offsets," if (@store_offset)
|
306
|
+
str << "store_positions," if (@store_position)
|
308
307
|
str << "omit_norms," if (@omit_norms)
|
309
308
|
str << "binary," if (@binary)
|
310
|
-
str << "<#{@name}:#{data}>"
|
309
|
+
str << "<#{@name}:#{@binary ? '=bin_data=' : data}>"
|
311
310
|
end
|
312
311
|
end
|
313
312
|
end
|
@@ -92,7 +92,7 @@ module Ferret::Index
|
|
92
92
|
|
93
93
|
length = @field_lengths[field_number] # length of field
|
94
94
|
position = @field_positions[field_number] # position in field
|
95
|
-
position += @analyzer.
|
95
|
+
position += @analyzer.pos_inc_gap(field_name) if length > 0
|
96
96
|
offset = @field_offsets[field_number] # offset field
|
97
97
|
|
98
98
|
if field_info.indexed?
|
@@ -120,18 +120,18 @@ module Ferret::Index
|
|
120
120
|
begin
|
121
121
|
last_token = nil
|
122
122
|
while token = stream.next
|
123
|
-
position += (token.
|
123
|
+
position += (token.pos_inc - 1)
|
124
124
|
|
125
125
|
if(field_info.store_offsets?())
|
126
126
|
add_position(field_name,
|
127
|
-
token.
|
127
|
+
token.text(),
|
128
128
|
position,
|
129
129
|
TermVectorOffsetInfo.new(
|
130
130
|
offset + token.start_offset(),
|
131
131
|
offset + token.end_offset()))
|
132
132
|
position += 1
|
133
133
|
else
|
134
|
-
add_position(field_name, token.
|
134
|
+
add_position(field_name, token.text(), position, nil)
|
135
135
|
position += 1
|
136
136
|
end
|
137
137
|
|
data/lib/ferret/index/index.rb
CHANGED
@@ -32,10 +32,15 @@ module Ferret::Index
|
|
32
32
|
# use the create_if_missing option.
|
33
33
|
# default_field:: This specifies the default field that will be
|
34
34
|
# used when you add a simple string to the index
|
35
|
-
# using #add_document
|
36
|
-
# default_search_field unless you set it
|
35
|
+
# using #add_document or <<. This will also be used
|
36
|
+
# for default_search_field unless you set it
|
37
37
|
# explicitly. The default for this value is the
|
38
|
-
#
|
38
|
+
# string "id".
|
39
|
+
# id_field: This field is as the field to search when doing
|
40
|
+
# searches on a term. For example, if you do a
|
41
|
+
# lookup by term "cat", ie index["cat"], this will
|
42
|
+
# be the field that is searched. This will default
|
43
|
+
# to default_field if not set.
|
39
44
|
# default_search_field:: This specifies the field or fields that will be
|
40
45
|
# searched by the query parser. You can use a
|
41
46
|
# string to specify one field, eg, "title". Or you
|
@@ -54,7 +59,7 @@ module Ferret::Index
|
|
54
59
|
# in-memory index which you'd like to read with
|
55
60
|
# this class. If you want to create a new index,
|
56
61
|
# you are better off passing in a path.
|
57
|
-
# close_dir:: This specifies whether you
|
62
|
+
# close_dir:: This specifies whether you want this class to
|
58
63
|
# close the index directory when this class is
|
59
64
|
# closed. This only has any meaning when you pass
|
60
65
|
# in a directory object in the *dir* option, in
|
@@ -76,7 +81,9 @@ module Ferret::Index
|
|
76
81
|
# as an existing document, the existing document will
|
77
82
|
# be replaced by the new object. This will slow
|
78
83
|
# down indexing so it should not be used if
|
79
|
-
# performance is a concern.
|
84
|
+
# performance is a concern. You must make sure that
|
85
|
+
# your key/keys are either untokenized or that they
|
86
|
+
# are not broken up by the analyzer.
|
80
87
|
# use_compound_file:: Uses a compound file to store the index. This
|
81
88
|
# prevents an error being raised for having too
|
82
89
|
# many files open at the same time. The default is
|
@@ -117,13 +124,16 @@ module Ferret::Index
|
|
117
124
|
def initialize(options = {})
|
118
125
|
super()
|
119
126
|
|
120
|
-
options[:default_search_field] &&= options[:default_search_field].to_s
|
121
127
|
options[:default_field] &&= options[:default_field].to_s
|
122
128
|
options[:create_if_missing] = true if options[:create_if_missing].nil?
|
123
129
|
@key = [options[:key]].flatten if options[:key]
|
124
130
|
|
125
131
|
if options[:path]
|
126
|
-
|
132
|
+
begin
|
133
|
+
@dir = FSDirectory.new(options[:path], options[:create])
|
134
|
+
rescue IOError => io
|
135
|
+
@dir = FSDirectory.new(options[:path], options[:create_if_missing])
|
136
|
+
end
|
127
137
|
options[:close_dir] = true
|
128
138
|
elsif options[:dir]
|
129
139
|
@dir = options[:dir]
|
@@ -145,7 +155,8 @@ module Ferret::Index
|
|
145
155
|
@auto_flush = @options[:auto_flush] || false
|
146
156
|
@default_search_field = (@options[:default_search_field] || \
|
147
157
|
@options[:default_field] || "*")
|
148
|
-
@default_field = @options[:default_field] || ""
|
158
|
+
@default_field = (@options[:default_field] || @options[:id_field] || "id").to_s
|
159
|
+
@id_field = (@options[:id_field] || @options[:default_field] || "id").to_s
|
149
160
|
@options[:handle_parse_errors] = true if @options[:handle_parse_errors].nil?
|
150
161
|
@open = true
|
151
162
|
@qp = nil
|
@@ -261,7 +272,10 @@ module Ferret::Index
|
|
261
272
|
|
262
273
|
# delete existing documents with the same key
|
263
274
|
if @key
|
264
|
-
query = @key.
|
275
|
+
query = @key.inject(BooleanQuery.new()) do |bq, field|
|
276
|
+
bq.add_query(TermQuery.new(Term.new(field, fdoc[field])),
|
277
|
+
BooleanClause::Occur::MUST)
|
278
|
+
end
|
265
279
|
query_delete(query)
|
266
280
|
end
|
267
281
|
|
@@ -317,7 +331,7 @@ module Ferret::Index
|
|
317
331
|
@dir.synchronize do
|
318
332
|
ensure_reader_open()
|
319
333
|
if id.is_a?(String)
|
320
|
-
t = Term.new(
|
334
|
+
t = Term.new(@id_field, id.to_s)
|
321
335
|
return @reader.get_document_with_term(t)
|
322
336
|
elsif id.is_a?(Term)
|
323
337
|
return @reader.get_document_with_term(id)
|
@@ -338,7 +352,7 @@ module Ferret::Index
|
|
338
352
|
cnt = 0
|
339
353
|
ensure_reader_open()
|
340
354
|
if id.is_a?(String)
|
341
|
-
t = Term.new(
|
355
|
+
t = Term.new(@id_field, id.to_s)
|
342
356
|
cnt = @reader.delete_docs_with_term(t)
|
343
357
|
elsif id.is_a?(Term)
|
344
358
|
cnt = @reader.delete_docs_with_term(id)
|
@@ -398,7 +412,7 @@ module Ferret::Index
|
|
398
412
|
document = doc(id)
|
399
413
|
if new_val.is_a?(Hash)
|
400
414
|
new_val.each_pair {|name, content| document[name] = content.to_s}
|
401
|
-
elsif new_val.is_a?(Document)
|
415
|
+
elsif new_val.is_a?(Ferret::Document::Document)
|
402
416
|
document = new_val
|
403
417
|
else
|
404
418
|
document[@options[:default_field]] = new_val.to_s
|
@@ -478,6 +492,7 @@ module Ferret::Index
|
|
478
492
|
@reader = nil
|
479
493
|
@writer = nil
|
480
494
|
@searcher = nil
|
495
|
+
@has_writes = false
|
481
496
|
end
|
482
497
|
end
|
483
498
|
|
@@ -575,27 +590,28 @@ module Ferret::Index
|
|
575
590
|
@writer = IndexWriter.new(@dir, @options)
|
576
591
|
end
|
577
592
|
|
593
|
+
# returns the new reader if one is opened
|
578
594
|
def ensure_reader_open()
|
579
595
|
raise "tried to use a closed index" if not @open
|
580
596
|
if @reader
|
581
597
|
if not @reader.latest?
|
582
|
-
@reader = IndexReader.open(@dir, false)
|
598
|
+
return @reader = IndexReader.open(@dir, false)
|
583
599
|
end
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
@
|
600
|
+
else
|
601
|
+
if @writer
|
602
|
+
@writer.close
|
603
|
+
@writer = nil
|
604
|
+
end
|
605
|
+
return @reader = IndexReader.open(@dir, false)
|
590
606
|
end
|
591
|
-
|
607
|
+
return false
|
592
608
|
end
|
593
609
|
|
594
610
|
def ensure_searcher_open()
|
595
611
|
raise "tried to use a closed index" if not @open
|
596
|
-
|
597
|
-
|
598
|
-
|
612
|
+
if ensure_reader_open() or not @searcher
|
613
|
+
@searcher = IndexSearcher.new(@reader)
|
614
|
+
end
|
599
615
|
end
|
600
616
|
|
601
617
|
private
|
@@ -28,8 +28,8 @@ module Index
|
|
28
28
|
|
29
29
|
WRITE_LOCK_TIMEOUT = 1
|
30
30
|
COMMIT_LOCK_TIMEOUT = 10
|
31
|
-
WRITE_LOCK_NAME = "write
|
32
|
-
COMMIT_LOCK_NAME = "commit
|
31
|
+
WRITE_LOCK_NAME = "write"
|
32
|
+
COMMIT_LOCK_NAME = "commit"
|
33
33
|
DEFAULT_MERGE_FACTOR = 10
|
34
34
|
DEFAULT_MIN_MERGE_DOCS = 10
|
35
35
|
DEFAULT_MAX_MERGE_DOCS = 0x7fffffff
|
@@ -4,6 +4,7 @@ module Ferret::Index
|
|
4
4
|
# @author Anders Nielsen
|
5
5
|
class MultipleTermDocPosEnum < TermDocEnum
|
6
6
|
|
7
|
+
attr_accessor :doc, :freq
|
7
8
|
class TermPositionsQueue < Ferret::Utils::PriorityQueue
|
8
9
|
def initialize(term_positions)
|
9
10
|
super(term_positions.size)
|
@@ -76,14 +77,6 @@ module Ferret::Index
|
|
76
77
|
return next?
|
77
78
|
end
|
78
79
|
|
79
|
-
def doc()
|
80
|
-
return @doc
|
81
|
-
end
|
82
|
-
|
83
|
-
def freq()
|
84
|
-
return @freq
|
85
|
-
end
|
86
|
-
|
87
80
|
def close()
|
88
81
|
while (tps = @tps_queue.pop())
|
89
82
|
tps.close()
|