ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/lib/ferret.rb
CHANGED
@@ -22,20 +22,19 @@
|
|
22
22
|
#++
|
23
23
|
# :include: ../TUTORIAL
|
24
24
|
module Ferret
|
25
|
-
VERSION = '0.
|
25
|
+
VERSION = '0.9.0'
|
26
26
|
end
|
27
27
|
|
28
|
-
require 'ferret/utils'
|
29
|
-
require 'ferret/document'
|
30
|
-
require 'ferret/stemmers'
|
31
|
-
require 'ferret/analysis'
|
32
|
-
require 'ferret/store'
|
33
|
-
require 'ferret/index'
|
34
|
-
require 'ferret/search'
|
35
|
-
require 'ferret/query_parser'
|
36
|
-
|
37
28
|
# try and load the C extension but it isn't necessary.
|
38
29
|
begin
|
39
30
|
require 'ferret_ext'
|
40
31
|
rescue Exception => e
|
32
|
+
require 'ferret/utils'
|
33
|
+
require 'ferret/document'
|
34
|
+
require 'ferret/stemmers'
|
35
|
+
require 'ferret/analysis'
|
36
|
+
require 'ferret/store'
|
37
|
+
require 'ferret/index'
|
38
|
+
require 'ferret/search'
|
39
|
+
require 'ferret/query_parser'
|
41
40
|
end
|
@@ -28,10 +28,10 @@ module Ferret::Analysis
|
|
28
28
|
# exact PhraseQuery matches, for instance, across Field instance boundaries.
|
29
29
|
#
|
30
30
|
# field_name:: Field name being indexed.
|
31
|
-
#
|
31
|
+
# pos_inc_gap:: added to the next token emitted from
|
32
32
|
# #token_stream(String,Reader)
|
33
33
|
#
|
34
|
-
def
|
34
|
+
def pos_inc_gap(field_name)
|
35
35
|
return 0
|
36
36
|
end
|
37
37
|
|
@@ -64,7 +64,7 @@ if __FILE__ == $0
|
|
64
64
|
$stdin.each do |line|
|
65
65
|
stk = Ferret::Analysis::StandardTokenizer.new(line)
|
66
66
|
while tk = stk.next()
|
67
|
-
puts " <" + tk.
|
67
|
+
puts " <" + tk.text + "> from " + tk.start_offset.to_s + " to " + tk.end_offset.to_s
|
68
68
|
end
|
69
69
|
end
|
70
70
|
end
|
@@ -18,25 +18,25 @@ module Ferret::Analysis
|
|
18
18
|
# end_offset:: is equal to one greater than the position of the last
|
19
19
|
# character corresponding of this token Note that the
|
20
20
|
# difference between @end_offset and @start_offset may not be
|
21
|
-
# equal to @
|
21
|
+
# equal to @text.length(), as the term text may have been
|
22
22
|
# altered by a stemmer or some other filter.
|
23
23
|
class Token
|
24
24
|
include Comparable
|
25
|
-
attr_accessor :
|
26
|
-
attr_reader :
|
25
|
+
attr_accessor :text
|
26
|
+
attr_reader :pos_inc, :start_offset, :end_offset, :type
|
27
27
|
|
28
28
|
# Constructs a Token with the given term text, and start & end offsets.
|
29
29
|
# The type defaults to "word."
|
30
|
-
def initialize(txt, so, eo, typ="word"
|
31
|
-
@
|
30
|
+
def initialize(txt, so, eo, pos_inc=1, typ="word")
|
31
|
+
@text = txt
|
32
32
|
@start_offset = so
|
33
33
|
@end_offset = eo
|
34
34
|
@type = typ # lexical type
|
35
|
-
@
|
35
|
+
@pos_inc = pos_inc
|
36
36
|
end
|
37
37
|
|
38
38
|
def set!(txt, so, eo)
|
39
|
-
@
|
39
|
+
@text = txt
|
40
40
|
@start_offset = so
|
41
41
|
@end_offset = eo
|
42
42
|
self
|
@@ -44,20 +44,20 @@ module Ferret::Analysis
|
|
44
44
|
|
45
45
|
def eql?(o)
|
46
46
|
return (o.instance_of?(Token) and @start_offset == o.start_offset and
|
47
|
-
@end_offset == o.end_offset and @
|
47
|
+
@end_offset == o.end_offset and @text == o.text)
|
48
48
|
end
|
49
49
|
alias :== :eql?
|
50
50
|
|
51
51
|
# Tokens are sorted by the position in the text at which they occur, ie
|
52
52
|
# the start_offset. If two tokens have the same start offset, (see
|
53
|
-
#
|
53
|
+
# pos_inc=) then, they are sorted by the end_offset and then
|
54
54
|
# lexically by the token text.
|
55
55
|
def <=>(o)
|
56
56
|
r = @start_offset <=> o.start_offset
|
57
57
|
return r if r != 0
|
58
58
|
r = @end_offset <=> o.end_offset
|
59
59
|
return r if r != 0
|
60
|
-
r = @
|
60
|
+
r = @text <=> o.text
|
61
61
|
return r
|
62
62
|
end
|
63
63
|
|
@@ -82,17 +82,17 @@ module Ferret::Analysis
|
|
82
82
|
# words and also sets the increment to the number of stop words removed
|
83
83
|
# before each non-stop word. Then exact phrase queries will only match
|
84
84
|
# when the terms occur with no intervening stop words.
|
85
|
-
def
|
85
|
+
def pos_inc=(pos_inc)
|
86
86
|
if (pos_inc < 0)
|
87
87
|
raise ArgumentError, "Increment must be zero or greater: " + pos_inc
|
88
88
|
end
|
89
|
-
@
|
89
|
+
@pos_inc = pos_inc
|
90
90
|
end
|
91
91
|
|
92
92
|
# Returns a string representation of the token with all the attributes.
|
93
93
|
def to_s
|
94
|
-
buf = "#{
|
95
|
-
buf << "(pos_inc=#{@
|
94
|
+
buf = "#{text}:#{start_offset}->#{end_offset}"
|
95
|
+
buf << "(pos_inc=#{@pos_inc})" if (@pos_inc != 1)
|
96
96
|
buf << "(type=#{@type})" if (@type != "word")
|
97
97
|
buf
|
98
98
|
end
|
@@ -24,7 +24,7 @@ module Ferret::Analysis
|
|
24
24
|
return nil
|
25
25
|
end
|
26
26
|
|
27
|
-
t.
|
27
|
+
t.text = t.text.downcase()
|
28
28
|
|
29
29
|
return t
|
30
30
|
end
|
@@ -50,7 +50,7 @@ module Ferret::Analysis
|
|
50
50
|
def next()
|
51
51
|
# return the first non-stop word found
|
52
52
|
while token = @input.next()
|
53
|
-
return token if ! @stop_set.include?(token.
|
53
|
+
return token if ! @stop_set.include?(token.text)
|
54
54
|
end
|
55
55
|
return nil
|
56
56
|
end
|
@@ -78,7 +78,7 @@ module Ferret::Analysis
|
|
78
78
|
if (token == nil)
|
79
79
|
return nil
|
80
80
|
else
|
81
|
-
token.
|
81
|
+
token.text = Stemmable.stem_porter(token.text)
|
82
82
|
end
|
83
83
|
token
|
84
84
|
end
|
@@ -27,8 +27,7 @@ module Ferret::Document
|
|
27
27
|
# Documents returned from IndexReader#document(int) and
|
28
28
|
# Hits#doc(int) may thus not have the same value present as when this field
|
29
29
|
# was indexed.
|
30
|
-
attr_accessor :boost, :data
|
31
|
-
|
30
|
+
attr_accessor :boost, :data
|
32
31
|
attr_reader :name
|
33
32
|
|
34
33
|
# True iff the value of the field is to be stored in the index for
|
@@ -160,34 +159,34 @@ module Ferret::Document
|
|
160
159
|
# this field more important.
|
161
160
|
def initialize(name,
|
162
161
|
value,
|
163
|
-
|
162
|
+
store = Store::YES,
|
164
163
|
index = Index::UNTOKENIZED,
|
165
|
-
|
164
|
+
term_vector = TermVector::NO,
|
166
165
|
binary = false,
|
167
166
|
boost = 1.0)
|
168
|
-
if (index == Index::NO and
|
167
|
+
if (index == Index::NO and store == Store::NO)
|
169
168
|
raise ArgumentError, "it doesn't make sense to have a field that " +
|
170
169
|
"is neither indexed nor stored"
|
171
170
|
end
|
172
|
-
if (index == Index::NO &&
|
171
|
+
if (index == Index::NO && term_vector != TermVector::NO)
|
173
172
|
raise ArgumentError, "cannot store term vector information for a " +
|
174
173
|
"field that is not indexed"
|
175
174
|
end
|
176
175
|
|
177
176
|
# The name of the field (e.g., "date", "subject", "title", or "body")
|
178
|
-
@name = name
|
177
|
+
@name = name.to_s
|
179
178
|
|
180
179
|
# the one and only data object for all different kind of field values
|
181
180
|
@data = value
|
182
|
-
self.
|
181
|
+
self.store = store
|
183
182
|
self.index = index
|
184
|
-
self.
|
183
|
+
self.term_vector = term_vector
|
185
184
|
@binary = binary
|
186
185
|
@boost = boost
|
187
186
|
end
|
188
187
|
|
189
|
-
def
|
190
|
-
case
|
188
|
+
def store=(store)
|
189
|
+
case store
|
191
190
|
when Store::YES
|
192
191
|
@stored = true
|
193
192
|
@compressed = false
|
@@ -198,7 +197,7 @@ module Ferret::Document
|
|
198
197
|
@stored = false
|
199
198
|
@compressed = false
|
200
199
|
else
|
201
|
-
raise "unknown stored parameter " +
|
200
|
+
raise "unknown stored parameter " + store.to_s
|
202
201
|
end
|
203
202
|
end
|
204
203
|
|
@@ -223,8 +222,8 @@ module Ferret::Document
|
|
223
222
|
end
|
224
223
|
end
|
225
224
|
|
226
|
-
def
|
227
|
-
case
|
225
|
+
def term_vector=(term_vector)
|
226
|
+
case term_vector
|
228
227
|
when TermVector::NO
|
229
228
|
@store_term_vector = false
|
230
229
|
@store_position = false
|
@@ -303,11 +302,11 @@ module Ferret::Document
|
|
303
302
|
str << "indexed," if (@indexed)
|
304
303
|
str << "tokenized," if (@tokenized)
|
305
304
|
str << "store_term_vector," if (@store_term_vector)
|
306
|
-
str << "
|
307
|
-
str << "
|
305
|
+
str << "store_offsets," if (@store_offset)
|
306
|
+
str << "store_positions," if (@store_position)
|
308
307
|
str << "omit_norms," if (@omit_norms)
|
309
308
|
str << "binary," if (@binary)
|
310
|
-
str << "<#{@name}:#{data}>"
|
309
|
+
str << "<#{@name}:#{@binary ? '=bin_data=' : data}>"
|
311
310
|
end
|
312
311
|
end
|
313
312
|
end
|
@@ -92,7 +92,7 @@ module Ferret::Index
|
|
92
92
|
|
93
93
|
length = @field_lengths[field_number] # length of field
|
94
94
|
position = @field_positions[field_number] # position in field
|
95
|
-
position += @analyzer.
|
95
|
+
position += @analyzer.pos_inc_gap(field_name) if length > 0
|
96
96
|
offset = @field_offsets[field_number] # offset field
|
97
97
|
|
98
98
|
if field_info.indexed?
|
@@ -120,18 +120,18 @@ module Ferret::Index
|
|
120
120
|
begin
|
121
121
|
last_token = nil
|
122
122
|
while token = stream.next
|
123
|
-
position += (token.
|
123
|
+
position += (token.pos_inc - 1)
|
124
124
|
|
125
125
|
if(field_info.store_offsets?())
|
126
126
|
add_position(field_name,
|
127
|
-
token.
|
127
|
+
token.text(),
|
128
128
|
position,
|
129
129
|
TermVectorOffsetInfo.new(
|
130
130
|
offset + token.start_offset(),
|
131
131
|
offset + token.end_offset()))
|
132
132
|
position += 1
|
133
133
|
else
|
134
|
-
add_position(field_name, token.
|
134
|
+
add_position(field_name, token.text(), position, nil)
|
135
135
|
position += 1
|
136
136
|
end
|
137
137
|
|
data/lib/ferret/index/index.rb
CHANGED
@@ -32,10 +32,15 @@ module Ferret::Index
|
|
32
32
|
# use the create_if_missing option.
|
33
33
|
# default_field:: This specifies the default field that will be
|
34
34
|
# used when you add a simple string to the index
|
35
|
-
# using #add_document
|
36
|
-
# default_search_field unless you set it
|
35
|
+
# using #add_document or <<. This will also be used
|
36
|
+
# for default_search_field unless you set it
|
37
37
|
# explicitly. The default for this value is the
|
38
|
-
#
|
38
|
+
# string "id".
|
39
|
+
# id_field: This field is as the field to search when doing
|
40
|
+
# searches on a term. For example, if you do a
|
41
|
+
# lookup by term "cat", ie index["cat"], this will
|
42
|
+
# be the field that is searched. This will default
|
43
|
+
# to default_field if not set.
|
39
44
|
# default_search_field:: This specifies the field or fields that will be
|
40
45
|
# searched by the query parser. You can use a
|
41
46
|
# string to specify one field, eg, "title". Or you
|
@@ -54,7 +59,7 @@ module Ferret::Index
|
|
54
59
|
# in-memory index which you'd like to read with
|
55
60
|
# this class. If you want to create a new index,
|
56
61
|
# you are better off passing in a path.
|
57
|
-
# close_dir:: This specifies whether you
|
62
|
+
# close_dir:: This specifies whether you want this class to
|
58
63
|
# close the index directory when this class is
|
59
64
|
# closed. This only has any meaning when you pass
|
60
65
|
# in a directory object in the *dir* option, in
|
@@ -76,7 +81,9 @@ module Ferret::Index
|
|
76
81
|
# as an existing document, the existing document will
|
77
82
|
# be replaced by the new object. This will slow
|
78
83
|
# down indexing so it should not be used if
|
79
|
-
# performance is a concern.
|
84
|
+
# performance is a concern. You must make sure that
|
85
|
+
# your key/keys are either untokenized or that they
|
86
|
+
# are not broken up by the analyzer.
|
80
87
|
# use_compound_file:: Uses a compound file to store the index. This
|
81
88
|
# prevents an error being raised for having too
|
82
89
|
# many files open at the same time. The default is
|
@@ -117,13 +124,16 @@ module Ferret::Index
|
|
117
124
|
def initialize(options = {})
|
118
125
|
super()
|
119
126
|
|
120
|
-
options[:default_search_field] &&= options[:default_search_field].to_s
|
121
127
|
options[:default_field] &&= options[:default_field].to_s
|
122
128
|
options[:create_if_missing] = true if options[:create_if_missing].nil?
|
123
129
|
@key = [options[:key]].flatten if options[:key]
|
124
130
|
|
125
131
|
if options[:path]
|
126
|
-
|
132
|
+
begin
|
133
|
+
@dir = FSDirectory.new(options[:path], options[:create])
|
134
|
+
rescue IOError => io
|
135
|
+
@dir = FSDirectory.new(options[:path], options[:create_if_missing])
|
136
|
+
end
|
127
137
|
options[:close_dir] = true
|
128
138
|
elsif options[:dir]
|
129
139
|
@dir = options[:dir]
|
@@ -145,7 +155,8 @@ module Ferret::Index
|
|
145
155
|
@auto_flush = @options[:auto_flush] || false
|
146
156
|
@default_search_field = (@options[:default_search_field] || \
|
147
157
|
@options[:default_field] || "*")
|
148
|
-
@default_field = @options[:default_field] || ""
|
158
|
+
@default_field = (@options[:default_field] || @options[:id_field] || "id").to_s
|
159
|
+
@id_field = (@options[:id_field] || @options[:default_field] || "id").to_s
|
149
160
|
@options[:handle_parse_errors] = true if @options[:handle_parse_errors].nil?
|
150
161
|
@open = true
|
151
162
|
@qp = nil
|
@@ -261,7 +272,10 @@ module Ferret::Index
|
|
261
272
|
|
262
273
|
# delete existing documents with the same key
|
263
274
|
if @key
|
264
|
-
query = @key.
|
275
|
+
query = @key.inject(BooleanQuery.new()) do |bq, field|
|
276
|
+
bq.add_query(TermQuery.new(Term.new(field, fdoc[field])),
|
277
|
+
BooleanClause::Occur::MUST)
|
278
|
+
end
|
265
279
|
query_delete(query)
|
266
280
|
end
|
267
281
|
|
@@ -317,7 +331,7 @@ module Ferret::Index
|
|
317
331
|
@dir.synchronize do
|
318
332
|
ensure_reader_open()
|
319
333
|
if id.is_a?(String)
|
320
|
-
t = Term.new(
|
334
|
+
t = Term.new(@id_field, id.to_s)
|
321
335
|
return @reader.get_document_with_term(t)
|
322
336
|
elsif id.is_a?(Term)
|
323
337
|
return @reader.get_document_with_term(id)
|
@@ -338,7 +352,7 @@ module Ferret::Index
|
|
338
352
|
cnt = 0
|
339
353
|
ensure_reader_open()
|
340
354
|
if id.is_a?(String)
|
341
|
-
t = Term.new(
|
355
|
+
t = Term.new(@id_field, id.to_s)
|
342
356
|
cnt = @reader.delete_docs_with_term(t)
|
343
357
|
elsif id.is_a?(Term)
|
344
358
|
cnt = @reader.delete_docs_with_term(id)
|
@@ -398,7 +412,7 @@ module Ferret::Index
|
|
398
412
|
document = doc(id)
|
399
413
|
if new_val.is_a?(Hash)
|
400
414
|
new_val.each_pair {|name, content| document[name] = content.to_s}
|
401
|
-
elsif new_val.is_a?(Document)
|
415
|
+
elsif new_val.is_a?(Ferret::Document::Document)
|
402
416
|
document = new_val
|
403
417
|
else
|
404
418
|
document[@options[:default_field]] = new_val.to_s
|
@@ -478,6 +492,7 @@ module Ferret::Index
|
|
478
492
|
@reader = nil
|
479
493
|
@writer = nil
|
480
494
|
@searcher = nil
|
495
|
+
@has_writes = false
|
481
496
|
end
|
482
497
|
end
|
483
498
|
|
@@ -575,27 +590,28 @@ module Ferret::Index
|
|
575
590
|
@writer = IndexWriter.new(@dir, @options)
|
576
591
|
end
|
577
592
|
|
593
|
+
# returns the new reader if one is opened
|
578
594
|
def ensure_reader_open()
|
579
595
|
raise "tried to use a closed index" if not @open
|
580
596
|
if @reader
|
581
597
|
if not @reader.latest?
|
582
|
-
@reader = IndexReader.open(@dir, false)
|
598
|
+
return @reader = IndexReader.open(@dir, false)
|
583
599
|
end
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
@
|
600
|
+
else
|
601
|
+
if @writer
|
602
|
+
@writer.close
|
603
|
+
@writer = nil
|
604
|
+
end
|
605
|
+
return @reader = IndexReader.open(@dir, false)
|
590
606
|
end
|
591
|
-
|
607
|
+
return false
|
592
608
|
end
|
593
609
|
|
594
610
|
def ensure_searcher_open()
|
595
611
|
raise "tried to use a closed index" if not @open
|
596
|
-
|
597
|
-
|
598
|
-
|
612
|
+
if ensure_reader_open() or not @searcher
|
613
|
+
@searcher = IndexSearcher.new(@reader)
|
614
|
+
end
|
599
615
|
end
|
600
616
|
|
601
617
|
private
|
@@ -28,8 +28,8 @@ module Index
|
|
28
28
|
|
29
29
|
WRITE_LOCK_TIMEOUT = 1
|
30
30
|
COMMIT_LOCK_TIMEOUT = 10
|
31
|
-
WRITE_LOCK_NAME = "write
|
32
|
-
COMMIT_LOCK_NAME = "commit
|
31
|
+
WRITE_LOCK_NAME = "write"
|
32
|
+
COMMIT_LOCK_NAME = "commit"
|
33
33
|
DEFAULT_MERGE_FACTOR = 10
|
34
34
|
DEFAULT_MIN_MERGE_DOCS = 10
|
35
35
|
DEFAULT_MAX_MERGE_DOCS = 0x7fffffff
|
@@ -4,6 +4,7 @@ module Ferret::Index
|
|
4
4
|
# @author Anders Nielsen
|
5
5
|
class MultipleTermDocPosEnum < TermDocEnum
|
6
6
|
|
7
|
+
attr_accessor :doc, :freq
|
7
8
|
class TermPositionsQueue < Ferret::Utils::PriorityQueue
|
8
9
|
def initialize(term_positions)
|
9
10
|
super(term_positions.size)
|
@@ -76,14 +77,6 @@ module Ferret::Index
|
|
76
77
|
return next?
|
77
78
|
end
|
78
79
|
|
79
|
-
def doc()
|
80
|
-
return @doc
|
81
|
-
end
|
82
|
-
|
83
|
-
def freq()
|
84
|
-
return @freq
|
85
|
-
end
|
86
|
-
|
87
80
|
def close()
|
88
81
|
while (tps = @tps_queue.pop())
|
89
82
|
tps.close()
|