ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
@@ -6,14 +6,14 @@ module Ferret::Index
|
|
6
6
|
# to the terms in the array obtained from _terms_
|
7
7
|
# method. Each location in the array contains the number of times this
|
8
8
|
# term occurs in the document or the document field.
|
9
|
-
attr_reader :
|
9
|
+
attr_reader :freqs, :positions, :offsets
|
10
10
|
|
11
11
|
attr_reader :field, :terms
|
12
12
|
|
13
|
-
def initialize(field, terms,
|
13
|
+
def initialize(field, terms, freqs, positions=nil, offsets=nil)
|
14
14
|
@field = field
|
15
15
|
@terms = terms
|
16
|
-
@
|
16
|
+
@freqs = freqs
|
17
17
|
@positions = positions
|
18
18
|
@offsets = offsets
|
19
19
|
end
|
@@ -23,7 +23,7 @@ module Ferret::Index
|
|
23
23
|
if @terms
|
24
24
|
terms.each_with_index do |term, i|
|
25
25
|
sb << ', ' if i > 0
|
26
|
-
sb << term + '/' + @
|
26
|
+
sb << term + '/' + @freqs[i].to_s
|
27
27
|
end
|
28
28
|
end
|
29
29
|
sb << 'end'
|
data/lib/ferret/index/term.rb
CHANGED
@@ -19,7 +19,7 @@ module Ferret::Index
|
|
19
19
|
# Constructs a Term with the given field and text
|
20
20
|
def initialize(fld_name, txt)
|
21
21
|
@field = fld_name
|
22
|
-
@text = txt
|
22
|
+
@text = txt.to_s
|
23
23
|
end
|
24
24
|
|
25
25
|
# Combines the hash() of the field and the text.
|
@@ -42,6 +42,10 @@ module Ferret::Index
|
|
42
42
|
initialize(fld_name, txt)
|
43
43
|
end
|
44
44
|
|
45
|
+
def text=(text)
|
46
|
+
@text = text.to_s
|
47
|
+
end
|
48
|
+
|
45
49
|
def to_s
|
46
50
|
@field + ":" + @text
|
47
51
|
end
|
@@ -1,20 +1,20 @@
|
|
1
1
|
module Ferret::Index
|
2
2
|
class TermVectorOffsetInfo
|
3
|
-
attr_accessor :
|
3
|
+
attr_accessor :start, :end
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@
|
5
|
+
def initialize(start, endd)
|
6
|
+
@end = endd
|
7
|
+
@start = start
|
8
8
|
end
|
9
9
|
|
10
10
|
def eql?(o)
|
11
11
|
return false if !o.instance_of?(TermVectorOffsetInfo)
|
12
|
-
@
|
12
|
+
@end == o.end and @start == o.start
|
13
13
|
end
|
14
14
|
alias :== :eql?
|
15
15
|
|
16
16
|
def hash()
|
17
|
-
29 * @
|
17
|
+
29 * @start + @end
|
18
18
|
end
|
19
19
|
end
|
20
20
|
end
|
@@ -145,7 +145,7 @@ module Ferret::Index
|
|
145
145
|
|
146
146
|
vector.size.times do |j|
|
147
147
|
add_term_internal(vector.terms[j],
|
148
|
-
vector.
|
148
|
+
vector.freqs[j],
|
149
149
|
store_positions ? vector.positions[j] : nil,
|
150
150
|
store_offsets ? vector.offsets[j] : nil)
|
151
151
|
end
|
@@ -247,11 +247,11 @@ module Ferret::Index
|
|
247
247
|
# use delta encoding for offsets
|
248
248
|
position = 0
|
249
249
|
term.freq.times do |j|
|
250
|
-
@tvf.write_vint(term.offsets[j].
|
250
|
+
@tvf.write_vint(term.offsets[j].start - position)
|
251
251
|
#Save the diff between the two.
|
252
|
-
@tvf.write_vint(term.offsets[j].
|
253
|
-
term.offsets[j].
|
254
|
-
position = term.offsets[j].
|
252
|
+
@tvf.write_vint(term.offsets[j].end -
|
253
|
+
term.offsets[j].start)
|
254
|
+
position = term.offsets[j].end()
|
255
255
|
end
|
256
256
|
end
|
257
257
|
end
|
@@ -11,7 +11,7 @@ module Ferret
|
|
11
11
|
|
12
12
|
class QueryParser < Racc::Parser
|
13
13
|
|
14
|
-
module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..
|
14
|
+
module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id0396ae54ac', 'lib/ferret/query_parser/query_parser.y', 126
|
15
15
|
attr_accessor :default_field, :fields, :handle_parse_errors
|
16
16
|
|
17
17
|
def initialize(default_field = "*", options = {})
|
@@ -170,22 +170,23 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
170
170
|
end
|
171
171
|
|
172
172
|
def get_bad_query(field, str)
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
173
|
+
get_term_query(field, str)
|
174
|
+
#tokens = []
|
175
|
+
#stream = @analyzer.token_stream(field, str)
|
176
|
+
#while token = stream.next
|
177
|
+
# tokens << token
|
178
|
+
#end
|
179
|
+
#if tokens.length == 0
|
180
|
+
# return TermQuery.new(Term.new(field, ""))
|
181
|
+
#elsif tokens.length == 1
|
182
|
+
# return TermQuery.new(Term.new(field, tokens[0].text))
|
183
|
+
#else
|
184
|
+
# bq = BooleanQuery.new()
|
185
|
+
# tokens.each do |token|
|
186
|
+
# bq << BooleanClause.new(TermQuery.new(Term.new(field, token.text)))
|
187
|
+
# end
|
188
|
+
# return bq
|
189
|
+
#end
|
189
190
|
end
|
190
191
|
|
191
192
|
def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
|
@@ -201,11 +202,11 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
201
202
|
if tokens.length == 0
|
202
203
|
return TermQuery.new(Term.new(field, ""))
|
203
204
|
elsif tokens.length == 1
|
204
|
-
return TermQuery.new(Term.new(field, tokens[0].
|
205
|
+
return TermQuery.new(Term.new(field, tokens[0].text))
|
205
206
|
else
|
206
207
|
pq = PhraseQuery.new()
|
207
208
|
tokens.each do |token|
|
208
|
-
pq.add(Term.new(field, token.
|
209
|
+
pq.add(Term.new(field, token.text), nil, token.pos_inc)
|
209
210
|
end
|
210
211
|
return pq
|
211
212
|
end
|
@@ -216,9 +217,9 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
216
217
|
stream = @analyzer.token_stream(field, word)
|
217
218
|
if token = stream.next # only makes sense to look at one term for fuzzy
|
218
219
|
if min_sim
|
219
|
-
return FuzzyQuery.new(Term.new(field, token.
|
220
|
+
return FuzzyQuery.new(Term.new(field, token.text), min_sim.to_f)
|
220
221
|
else
|
221
|
-
return FuzzyQuery.new(Term.new(field, token.
|
222
|
+
return FuzzyQuery.new(Term.new(field, token.text))
|
222
223
|
end
|
223
224
|
else
|
224
225
|
return TermQuery.new(Term.new(field, ""))
|
@@ -254,8 +255,8 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
254
255
|
tokens << token
|
255
256
|
end
|
256
257
|
tokens.each do |token|
|
257
|
-
pq.add(Term.new(field, token.
|
258
|
-
token.
|
258
|
+
pq.add(Term.new(field, token.text), nil,
|
259
|
+
token.pos_inc + pos_inc)
|
259
260
|
pos_inc = 0
|
260
261
|
end
|
261
262
|
end
|
@@ -278,7 +279,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
278
279
|
position.each do |word|
|
279
280
|
stream = @analyzer.token_stream(field, word)
|
280
281
|
if token = stream.next # only put one term per word
|
281
|
-
terms << Term.new(field, token.
|
282
|
+
terms << Term.new(field, token.text)
|
282
283
|
end
|
283
284
|
end
|
284
285
|
mpq.add(terms, nil, pos_inc + 1) # must go at least one forward
|
@@ -290,8 +291,8 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
290
291
|
tokens << token
|
291
292
|
end
|
292
293
|
tokens.each do |token|
|
293
|
-
mpq.add([Term.new(field, token.
|
294
|
-
token.
|
294
|
+
mpq.add([Term.new(field, token.text)], nil,
|
295
|
+
token.pos_inc + pos_inc)
|
295
296
|
pos_inc = 0
|
296
297
|
end
|
297
298
|
end
|
@@ -300,8 +301,15 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
300
301
|
end
|
301
302
|
|
302
303
|
def get_phrase_query(positions, slop = nil)
|
303
|
-
if positions.size == 1
|
304
|
-
|
304
|
+
if positions.size == 1
|
305
|
+
if positions[0].is_a?(Array)
|
306
|
+
clauses = positions[0].map { |word|
|
307
|
+
BooleanClause.new(_get_term_query(word), BooleanClause::Occur::SHOULD)
|
308
|
+
}
|
309
|
+
return get_boolean_query(clauses)
|
310
|
+
else
|
311
|
+
return _get_term_query(positions[0])
|
312
|
+
end
|
305
313
|
end
|
306
314
|
|
307
315
|
multi_phrase = false
|
@@ -402,7 +410,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id6e7f6ac20
|
|
402
410
|
return qp.parse(query)
|
403
411
|
end
|
404
412
|
|
405
|
-
..end lib/ferret/query_parser/query_parser.y modeval..
|
413
|
+
..end lib/ferret/query_parser/query_parser.y modeval..id0396ae54ac
|
406
414
|
|
407
415
|
##### racc 1.4.4 generates ###
|
408
416
|
|
@@ -416,14 +424,14 @@ racc_reduce_table = [
|
|
416
424
|
2, 28, :_reduce_6,
|
417
425
|
2, 28, :_reduce_7,
|
418
426
|
1, 28, :_reduce_8,
|
419
|
-
1, 30, :_reduce_none,
|
420
|
-
3, 30, :_reduce_10,
|
421
|
-
1, 29, :_reduce_none,
|
422
|
-
3, 29, :_reduce_12,
|
423
|
-
1, 29, :_reduce_none,
|
424
|
-
1, 29, :_reduce_none,
|
425
|
-
1, 29, :_reduce_none,
|
426
427
|
1, 29, :_reduce_none,
|
428
|
+
3, 29, :_reduce_10,
|
429
|
+
1, 30, :_reduce_none,
|
430
|
+
3, 30, :_reduce_12,
|
431
|
+
1, 30, :_reduce_none,
|
432
|
+
1, 30, :_reduce_none,
|
433
|
+
1, 30, :_reduce_none,
|
434
|
+
1, 30, :_reduce_none,
|
427
435
|
1, 31, :_reduce_17,
|
428
436
|
3, 31, :_reduce_18,
|
429
437
|
2, 31, :_reduce_19,
|
@@ -461,15 +469,15 @@ racc_reduce_n = 48
|
|
461
469
|
racc_shift_n = 78
|
462
470
|
|
463
471
|
racc_action_table = [
|
464
|
-
8, 10,
|
465
|
-
|
466
|
-
3,
|
467
|
-
9,
|
468
|
-
|
472
|
+
8, 10, 67, 66, 75, 74, 50, 21, 2, 40,
|
473
|
+
25, 7, 9, 38, 13, 15, 17, 19, 8, 10,
|
474
|
+
3, 53, 46, 39, 26, 21, 2, 37, -26, 7,
|
475
|
+
9, 45, 13, 15, 17, 19, 8, 10, 3, 43,
|
476
|
+
64, 49, -26, 21, 2, 60, 59, 7, 9, 63,
|
469
477
|
13, 15, 17, 19, 58, 57, 3, 8, 10, 31,
|
470
478
|
33, 54, 55, 56, 21, 2, 44, 48, 7, 9,
|
471
|
-
61, 13, 15, 17, 19,
|
472
|
-
31, 33,
|
479
|
+
61, 13, 15, 17, 19, 36, 62, 3, 8, 10,
|
480
|
+
31, 33, 34, 42, 65, 21, 2, 41, 30, 7,
|
473
481
|
9, 70, 13, 15, 17, 19, 8, 10, 3, 71,
|
474
482
|
72, 73, 24, 21, 2, 77, nil, 7, 9, nil,
|
475
483
|
13, 15, 17, 19, 21, 2, 3, nil, 7, 9,
|
@@ -480,61 +488,57 @@ racc_action_table = [
|
|
480
488
|
3 ]
|
481
489
|
|
482
490
|
racc_action_check = [
|
483
|
-
0, 0,
|
484
|
-
|
485
|
-
0,
|
486
|
-
2,
|
487
|
-
|
491
|
+
0, 0, 46, 46, 64, 64, 30, 0, 0, 17,
|
492
|
+
6, 0, 0, 15, 0, 0, 0, 0, 2, 2,
|
493
|
+
0, 34, 24, 17, 6, 2, 2, 15, 21, 2,
|
494
|
+
2, 24, 2, 2, 2, 2, 33, 33, 2, 21,
|
495
|
+
42, 28, 21, 33, 33, 38, 38, 33, 33, 42,
|
488
496
|
33, 33, 33, 33, 37, 35, 33, 23, 23, 23,
|
489
497
|
23, 35, 35, 35, 23, 23, 23, 26, 23, 23,
|
490
|
-
39, 23, 23, 23, 23,
|
491
|
-
12, 12,
|
498
|
+
39, 23, 23, 23, 23, 13, 41, 23, 12, 12,
|
499
|
+
12, 12, 13, 19, 43, 12, 12, 18, 11, 12,
|
492
500
|
12, 53, 12, 12, 12, 12, 31, 31, 12, 54,
|
493
501
|
55, 56, 3, 31, 31, 72, nil, 31, 31, nil,
|
494
|
-
31, 31, 31, 31,
|
495
|
-
nil,
|
496
|
-
25, nil, 25, 25, 25, 25,
|
497
|
-
|
498
|
-
nil,
|
499
|
-
|
502
|
+
31, 31, 31, 31, 8, 8, 31, nil, 8, 8,
|
503
|
+
nil, 8, 8, 8, 8, 25, 25, 8, nil, 25,
|
504
|
+
25, nil, 25, 25, 25, 25, 10, 10, 25, nil,
|
505
|
+
10, 10, nil, 10, 10, 10, 10, 49, 49, 10,
|
506
|
+
nil, 49, 49, nil, 49, 49, 49, 49, nil, nil,
|
507
|
+
49 ]
|
500
508
|
|
501
509
|
racc_action_pointer = [
|
502
|
-
-3, nil, 15, 92, nil, nil,
|
503
|
-
|
504
|
-
nil,
|
505
|
-
6, 93, nil, 33,
|
506
|
-
|
510
|
+
-3, nil, 15, 92, nil, nil, 8, nil, 104, nil,
|
511
|
+
126, 88, 75, 65, nil, 3, nil, -1, 78, 73,
|
512
|
+
nil, 26, nil, 54, 12, 115, 57, nil, 39, nil,
|
513
|
+
6, 93, nil, 33, 8, 45, nil, 44, 24, 60,
|
514
|
+
nil, 66, 30, 74, nil, nil, -19, nil, nil, 137,
|
507
515
|
nil, nil, nil, 81, 89, 87, 82, nil, nil, nil,
|
508
516
|
nil, nil, nil, nil, -17, nil, nil, nil, nil, nil,
|
509
517
|
nil, nil, 95, nil, nil, nil, nil, nil ]
|
510
518
|
|
511
519
|
racc_action_default = [
|
512
520
|
-48, -14, -48, -48, -15, -16, -48, -20, -48, -23,
|
513
|
-
-48, -48, -1, -48, -2, -48, -
|
521
|
+
-48, -48, -1, -48, -2, -48, -8, -48, -9, -48,
|
514
522
|
-11, -17, -13, -48, -48, -48, -48, -6, -48, -7,
|
515
523
|
-48, -48, -5, -48, -30, -48, -32, -48, -44, -48,
|
516
|
-
-
|
524
|
+
-47, -48, -48, -19, -12, -43, -48, -21, -27, -48,
|
517
525
|
78, -3, -4, -48, -48, -28, -48, -33, -45, -40,
|
518
|
-
-41, -
|
526
|
+
-41, -46, -10, -42, -48, -18, -39, -38, -22, -24,
|
519
527
|
-31, -35, -48, -34, -37, -36, -25, -29 ]
|
520
528
|
|
521
529
|
racc_goto_table = [
|
522
|
-
|
523
|
-
|
524
|
-
51, nil, 52, nil,
|
525
|
-
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
526
|
-
nil, 69 ]
|
530
|
+
47, 32, 12, 27, 23, 29, 11, 68, 28, 76,
|
531
|
+
35, nil, 32, nil, nil, nil, nil, nil, nil, nil,
|
532
|
+
51, nil, 52, nil, 69 ]
|
527
533
|
|
528
534
|
racc_goto_check = [
|
529
|
-
|
530
|
-
|
531
|
-
3, nil, 3, nil,
|
532
|
-
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
533
|
-
nil, 4 ]
|
535
|
+
5, 3, 2, 4, 2, 4, 1, 12, 13, 14,
|
536
|
+
15, nil, 3, nil, nil, nil, nil, nil, nil, nil,
|
537
|
+
3, nil, 3, nil, 5 ]
|
534
538
|
|
535
539
|
racc_goto_pointer = [
|
536
|
-
nil, 6,
|
537
|
-
nil, nil, -
|
540
|
+
nil, 6, 2, -11, -5, -25, nil, nil, nil, nil,
|
541
|
+
nil, nil, -40, -1, -60, -3 ]
|
538
542
|
|
539
543
|
racc_goto_default = [
|
540
544
|
nil, nil, nil, 14, 16, 18, 20, 22, 1, 4,
|
@@ -617,8 +621,8 @@ Racc_token_to_s_table = [
|
|
617
621
|
'top_query',
|
618
622
|
'bool_query',
|
619
623
|
'bool_clause',
|
620
|
-
'query',
|
621
624
|
'boosted_query',
|
625
|
+
'query',
|
622
626
|
'term_query',
|
623
627
|
'field_query',
|
624
628
|
'phrase_query',
|
data/lib/ferret/search.rb
CHANGED
@@ -44,6 +44,6 @@ require 'ferret/search/range_filter.rb'
|
|
44
44
|
require 'ferret/search/query_filter.rb'
|
45
45
|
require 'ferret/search/caching_wrapper_filter.rb'
|
46
46
|
require 'ferret/search/filtered_query.rb'
|
47
|
-
require 'ferret/search/
|
47
|
+
require 'ferret/search/match_all_query.rb'
|
48
48
|
require 'ferret/search/spans.rb'
|
49
49
|
require 'ferret/search/index_searcher.rb'
|
@@ -82,7 +82,7 @@ module Ferret::Search
|
|
82
82
|
#
|
83
83
|
# raises:: TooManyClauses if the new number of clauses exceeds the
|
84
84
|
# maximum clause number #max_clause_count()
|
85
|
-
def add_query(query, occur)
|
85
|
+
def add_query(query, occur=BooleanClause::Occur::SHOULD)
|
86
86
|
add_clause(BooleanClause.new(query, occur))
|
87
87
|
end
|
88
88
|
|
@@ -95,6 +95,7 @@ module Ferret::Search
|
|
95
95
|
end
|
96
96
|
|
97
97
|
@clauses << clause
|
98
|
+
self
|
98
99
|
end
|
99
100
|
alias :<< :add_clause
|
100
101
|
|
@@ -12,10 +12,10 @@ module Ferret::Search
|
|
12
12
|
|
13
13
|
# Creates a hit queue sorted by the given list of fields.
|
14
14
|
#
|
15
|
-
# reader::
|
15
|
+
# reader:: Index to use.
|
16
16
|
# fields:: Field names, in priority order (highest priority first).
|
17
|
-
# Cannot be +nil+ or empty.
|
18
|
-
#
|
17
|
+
# Cannot be +nil+ or empty.
|
18
|
+
# size:: The number of hits to retain. Must be greater than zero.
|
19
19
|
# raises:: IOError
|
20
20
|
def initialize(reader, fields, size)
|
21
21
|
super(size)
|
@@ -113,7 +113,8 @@ module Ferret::Search
|
|
113
113
|
def to_s(field = nil)
|
114
114
|
buffer = ""
|
115
115
|
buffer << "#{@term.field}:" if @term.field != field
|
116
|
-
buffer << "#{@term.text}
|
116
|
+
buffer << "#{@term.text}~"
|
117
|
+
buffer << minimum_similarity.to_s if minimum_similarity != 0.5
|
117
118
|
buffer << "^#{boost()}" if (boost() != 1.0)
|
118
119
|
return buffer
|
119
120
|
end
|
@@ -92,6 +92,9 @@ module Ferret::Search
|
|
92
92
|
num_docs = options[:num_docs]||10
|
93
93
|
max_size = first_doc + num_docs
|
94
94
|
sort = options[:sort]
|
95
|
+
if sort and not sort.kind_of?(Sort)
|
96
|
+
sort = Sort.new(sort)
|
97
|
+
end
|
95
98
|
|
96
99
|
if (num_docs <= 0)
|
97
100
|
raise ArgumentError, "num_docs must be > 0 to run a search"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Ferret::Search
|
2
2
|
# A query that matches all documents.
|
3
|
-
class
|
3
|
+
class MatchAllQuery < Query
|
4
4
|
|
5
5
|
def initialize()
|
6
6
|
super
|
@@ -20,7 +20,7 @@ module Ferret::Search
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def explain(doc)
|
23
|
-
return Explanation.new(1.0, "
|
23
|
+
return Explanation.new(1.0, "MatchAllQuery")
|
24
24
|
end
|
25
25
|
|
26
26
|
def next?
|
@@ -43,7 +43,7 @@ module Ferret::Search
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
class
|
46
|
+
class MatchAllWeight < Weight
|
47
47
|
attr_reader :query
|
48
48
|
def initialize(query, searcher)
|
49
49
|
@query = query
|
@@ -71,7 +71,7 @@ module Ferret::Search
|
|
71
71
|
|
72
72
|
def explain(reader, doc)
|
73
73
|
# explain query weight
|
74
|
-
query_expl = Explanation.new(1.0, "
|
74
|
+
query_expl = Explanation.new(1.0, "MatchAllQuery")
|
75
75
|
boost_expl = Explanation.new(@query.boost, "boost")
|
76
76
|
if (boost_expl.value != 1.0)
|
77
77
|
query_expl << boost_expl
|
@@ -83,17 +83,17 @@ module Ferret::Search
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def create_weight(searcher)
|
86
|
-
return
|
86
|
+
return MatchAllWeight.new(self, searcher)
|
87
87
|
end
|
88
88
|
|
89
89
|
def to_s(field)
|
90
|
-
buffer = "
|
90
|
+
buffer = "MatchAllQuery"
|
91
91
|
buffer << "^#{boost}" if (boost() != 1.0)
|
92
92
|
return buffer
|
93
93
|
end
|
94
94
|
|
95
95
|
def eql?(o)
|
96
|
-
return (o.instance_of?(
|
96
|
+
return (o.instance_of?(MatchAllQuery) and boost == o.boost)
|
97
97
|
end
|
98
98
|
alias :== :eql?
|
99
99
|
|