ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,79 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Matches spans near the beginning of a field.
|
3
|
-
class SpanFirstQuery < SpanQuery
|
4
|
-
# Construct a SpanFirstQuery matching spans in +match+ whose finish
|
5
|
-
# position is less than or equal to +finish+.
|
6
|
-
def initialize(match, finish)
|
7
|
-
super()
|
8
|
-
@match = match
|
9
|
-
@finish = finish
|
10
|
-
end
|
11
|
-
|
12
|
-
# Return the SpanQuery whose matches are filtered.
|
13
|
-
def match() @match end
|
14
|
-
|
15
|
-
# Return the maximum finish position permitted in a match.
|
16
|
-
def finish() @finish end
|
17
|
-
|
18
|
-
def field() @match.field() end
|
19
|
-
|
20
|
-
def terms() @match.terms() end
|
21
|
-
|
22
|
-
def to_s(field = nil)
|
23
|
-
return "span_first(#{@match.to_s(field)}, #{finish})"
|
24
|
-
end
|
25
|
-
|
26
|
-
def spans(reader)
|
27
|
-
SpanFirstEnum.new(self, reader)
|
28
|
-
end
|
29
|
-
|
30
|
-
class SpanFirstEnum < SpansEnum
|
31
|
-
def initialize(query, reader)
|
32
|
-
super()
|
33
|
-
@query = query
|
34
|
-
@spans = @query.match.spans(reader)
|
35
|
-
end
|
36
|
-
|
37
|
-
def next?()
|
38
|
-
while (@spans.next?()) # scan to next match
|
39
|
-
return true if (finish() <= @query.finish)
|
40
|
-
end
|
41
|
-
return false
|
42
|
-
end
|
43
|
-
|
44
|
-
def skip_to(target)
|
45
|
-
if not @spans.skip_to(target)
|
46
|
-
return false
|
47
|
-
end
|
48
|
-
|
49
|
-
if (@spans.finish <= @query.finish) # there is a match
|
50
|
-
return true
|
51
|
-
end
|
52
|
-
|
53
|
-
return next?() # scan to next match
|
54
|
-
end
|
55
|
-
|
56
|
-
def doc() @spans.doc() end
|
57
|
-
def start() @spans.start() end
|
58
|
-
def finish() @spans.finish() end
|
59
|
-
|
60
|
-
def to_s() "spans(#{@query})" end
|
61
|
-
end
|
62
|
-
|
63
|
-
|
64
|
-
def rewrite(reader)
|
65
|
-
clone = nil
|
66
|
-
rewritten = @match.rewrite(reader)
|
67
|
-
if (rewritten != @match)
|
68
|
-
clone = self.clone()
|
69
|
-
clone.match = rewritten
|
70
|
-
end
|
71
|
-
|
72
|
-
if (clone != nil)
|
73
|
-
return clone # some clauses rewrote
|
74
|
-
else
|
75
|
-
return self # no clauses rewrote
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
@@ -1,108 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Matches spans which are near one another. One can specify _slop_, the
|
3
|
-
# maximum number of intervening unmatched positions, as well as whether
|
4
|
-
# matches are required to be in-order.
|
5
|
-
class SpanNearQuery < SpanQuery
|
6
|
-
|
7
|
-
# Construct a SpanNearQuery. Matches spans matching a span from each
|
8
|
-
# clause, with up to +slop+ total unmatched positions between them. When
|
9
|
-
# +in_order+ is true, the spans from each clause must be ordered as in
|
10
|
-
# +clauses+.
|
11
|
-
def initialize(clauses, slop, in_order)
|
12
|
-
super()
|
13
|
-
# copy clauses array into an ArrayList
|
14
|
-
@clauses = Array.new(clauses.length)
|
15
|
-
@field = nil
|
16
|
-
clauses.each_index do |i|
|
17
|
-
clause = clauses[i]
|
18
|
-
if i == 0 # check field
|
19
|
-
@field = clause.field()
|
20
|
-
elsif clause.field() != @field
|
21
|
-
raise ArgumentError, "Clauses must have same field."
|
22
|
-
end
|
23
|
-
@clauses[i] = clause
|
24
|
-
end
|
25
|
-
|
26
|
-
@slop = slop
|
27
|
-
@in_order = in_order
|
28
|
-
end
|
29
|
-
|
30
|
-
# Return the clauses whose spans are matched.
|
31
|
-
def clauses() @clauses end
|
32
|
-
|
33
|
-
# Return the maximum number of intervening unmatched positions permitted.
|
34
|
-
def slop() @slop end
|
35
|
-
|
36
|
-
# Return true if matches are required to be in-order.
|
37
|
-
def in_order?() @in_order end
|
38
|
-
|
39
|
-
attr_reader :field
|
40
|
-
|
41
|
-
def terms()
|
42
|
-
terms = []
|
43
|
-
@clauses.each do |clause|
|
44
|
-
terms += clause.terms
|
45
|
-
end
|
46
|
-
return terms
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s(field = nil)
|
50
|
-
buffer = "span_near(["
|
51
|
-
buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
|
52
|
-
buffer << "], #{@slop}, #{@in_order})"
|
53
|
-
return buffer
|
54
|
-
end
|
55
|
-
|
56
|
-
def spans(reader)
|
57
|
-
if (@clauses.size() == 0) # optimize 0-clause case
|
58
|
-
return SpanOrQuery.new(@clauses).spans(reader)
|
59
|
-
end
|
60
|
-
|
61
|
-
if (@clauses.size() == 1) # optimize 1-clause case
|
62
|
-
return @clauses[0].spans(reader)
|
63
|
-
end
|
64
|
-
|
65
|
-
return NearSpansEnum.new(self, reader)
|
66
|
-
end
|
67
|
-
|
68
|
-
def rewrite(reader)
|
69
|
-
clone = nil
|
70
|
-
@clauses.each_index do |i|
|
71
|
-
clause = @clauses[i]
|
72
|
-
query = clause.rewrite(reader)
|
73
|
-
if (query != clause) # clause rewrote: must clone
|
74
|
-
if (clone == nil)
|
75
|
-
clone = self.clone()
|
76
|
-
end
|
77
|
-
clone.clauses[i] = query
|
78
|
-
end
|
79
|
-
end
|
80
|
-
if (clone != nil)
|
81
|
-
return clone # some clauses rewrote
|
82
|
-
else
|
83
|
-
return self # no clauses rewrote
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
# Returns true iff +o+ is equal to this.
|
88
|
-
def eql?(o)
|
89
|
-
return false if (o == nil or self.class() != o.class())
|
90
|
-
|
91
|
-
return false if (@in_order != o.in_order?)
|
92
|
-
return false if (@slop != o.slop)
|
93
|
-
return false if (@clauses != o.clauses)
|
94
|
-
return false if (@field != o.field)
|
95
|
-
|
96
|
-
return true
|
97
|
-
end
|
98
|
-
alias :== :eql?
|
99
|
-
|
100
|
-
def hash()
|
101
|
-
result = @clauses.hash()
|
102
|
-
result += @slop * 29
|
103
|
-
result += (@in_order ? 1 : 0)
|
104
|
-
result ^= @field.hash()
|
105
|
-
return result
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
@@ -1,130 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Removes matches which overlap with another SpanQuery.
|
3
|
-
class SpanNotQuery < SpanQuery
|
4
|
-
# Construct a SpanNotQuery matching spans from +incl+ which
|
5
|
-
# have no overlap with spans from +excl+.
|
6
|
-
def initialize(incl, excl)
|
7
|
-
super()
|
8
|
-
@incl = incl
|
9
|
-
@excl = excl
|
10
|
-
|
11
|
-
if incl.field != excl.field
|
12
|
-
raise ArgumentError, "Clauses must have same field."
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
# Return the SpanQuery whose matches are filtered.
|
17
|
-
def incl() @incl end
|
18
|
-
|
19
|
-
# Return the SpanQuery whose matches must not overlap those returned.
|
20
|
-
def excl() @excl end
|
21
|
-
|
22
|
-
def field() @incl.field() end
|
23
|
-
|
24
|
-
def terms() @incl.terms() end
|
25
|
-
|
26
|
-
def to_s(field = nil)
|
27
|
-
return "span_not(#{incl.to_s(field)}, #{excl.to_s(field)})"
|
28
|
-
end
|
29
|
-
|
30
|
-
def spans(reader)
|
31
|
-
return SpanNotEnum.new(self, reader)
|
32
|
-
end
|
33
|
-
|
34
|
-
class SpanNotEnum < SpansEnum
|
35
|
-
def initialize(query, reader)
|
36
|
-
@query = query
|
37
|
-
@incl_spans = @query.incl.spans(reader)
|
38
|
-
@more_incl = true
|
39
|
-
@excl_spans = @query.excl.spans(reader)
|
40
|
-
@more_excl = @excl_spans.next? # excl_spans needs to be preset
|
41
|
-
end
|
42
|
-
|
43
|
-
def next?()
|
44
|
-
if (@more_incl) # move to next incl
|
45
|
-
@more_incl = @incl_spans.next?()
|
46
|
-
end
|
47
|
-
|
48
|
-
while (@more_incl and @more_excl)
|
49
|
-
if (@incl_spans.doc > @excl_spans.doc) # skip excl
|
50
|
-
@more_excl = @excl_spans.skip_to(@incl_spans.doc)
|
51
|
-
end
|
52
|
-
|
53
|
-
while (@more_excl and # while excl is before
|
54
|
-
@incl_spans.doc == @excl_spans.doc and
|
55
|
-
@excl_spans.finish <= @incl_spans.start)
|
56
|
-
@more_excl = @excl_spans.next? # increment excl
|
57
|
-
end
|
58
|
-
|
59
|
-
if (not @more_excl or # if no intersection
|
60
|
-
@incl_spans.doc != @excl_spans.doc or
|
61
|
-
@incl_spans.finish <= @excl_spans.start)
|
62
|
-
break # we found a match
|
63
|
-
end
|
64
|
-
|
65
|
-
@more_incl = @incl_spans.next? # intersected: keep scanning
|
66
|
-
end
|
67
|
-
return @more_incl
|
68
|
-
end
|
69
|
-
|
70
|
-
def skip_to(target)
|
71
|
-
if @more_incl # skip incl
|
72
|
-
@more_incl = @incl_spans.skip_to(target)
|
73
|
-
end
|
74
|
-
|
75
|
-
if not @more_incl
|
76
|
-
return false
|
77
|
-
end
|
78
|
-
|
79
|
-
if (@more_excl and @incl_spans.doc > @excl_spans.doc) # skip excl
|
80
|
-
@more_excl = @excl_spans.skip_to(@incl_spans.doc)
|
81
|
-
end
|
82
|
-
|
83
|
-
while (@more_excl and # while excl is before
|
84
|
-
@incl_spans.doc == @excl_spans.doc and
|
85
|
-
@excl_spans.finish <= @incl_spans.start)
|
86
|
-
@more_excl = @excl_spans.next? # increment excl
|
87
|
-
end
|
88
|
-
|
89
|
-
if (not @more_excl or # if no intersection
|
90
|
-
@incl_spans.doc != @excl_spans.doc or
|
91
|
-
@incl_spans.finish <= @excl_spans.start)
|
92
|
-
return true # we found a match
|
93
|
-
end
|
94
|
-
|
95
|
-
return next?() # scan to next match
|
96
|
-
end
|
97
|
-
|
98
|
-
def doc() @incl_spans.doc end
|
99
|
-
def start() @incl_spans.start end
|
100
|
-
def finish() @incl_spans.finish end
|
101
|
-
|
102
|
-
def to_s()
|
103
|
-
return "spans(#{@query})"
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def rewrite(reader)
|
108
|
-
clone = nil
|
109
|
-
|
110
|
-
rewritten_incl = @incl.rewrite(reader)
|
111
|
-
if (rewritten_incl != @incl)
|
112
|
-
clone = self.clone()
|
113
|
-
clone.incl = rewritten_incl
|
114
|
-
end
|
115
|
-
|
116
|
-
rewritten_excl = @excl.rewrite(reader)
|
117
|
-
if (rewritten_excl != @excl)
|
118
|
-
clone = self.clone() if (clone == nil)
|
119
|
-
clone.excl = rewritten_excl
|
120
|
-
end
|
121
|
-
|
122
|
-
if (clone != nil)
|
123
|
-
return clone # some clauses rewrote
|
124
|
-
else
|
125
|
-
return self # no clauses rewrote
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
end
|
130
|
-
end
|
@@ -1,176 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Matches the union of its clauses.
|
3
|
-
class SpanOrQuery < SpanQuery
|
4
|
-
|
5
|
-
# Construct a SpanOrQuery merging the provided clauses.
|
6
|
-
def initialize(clauses)
|
7
|
-
super()
|
8
|
-
|
9
|
-
# copy clauses array into an ArrayList
|
10
|
-
@clauses = Array.new(clauses.length)
|
11
|
-
@field = nil
|
12
|
-
clauses.each_index do |i|
|
13
|
-
clause = clauses[i]
|
14
|
-
if i == 0 # check field
|
15
|
-
@field = clause.field()
|
16
|
-
elsif clause.field() != @field
|
17
|
-
raise ArgumentError, "Clauses must have same field."
|
18
|
-
end
|
19
|
-
@clauses[i] = clause
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
# Return the clauses whose spans are matched.
|
24
|
-
def clauses() @clauses end
|
25
|
-
|
26
|
-
attr_reader :field
|
27
|
-
|
28
|
-
def terms()
|
29
|
-
terms = []
|
30
|
-
@clauses.each do |clause|
|
31
|
-
terms += clause.terms
|
32
|
-
end
|
33
|
-
return terms
|
34
|
-
end
|
35
|
-
|
36
|
-
def rewrite(reader)
|
37
|
-
clone = nil
|
38
|
-
@clauses.each_index do |i|
|
39
|
-
clause = @clauses[i]
|
40
|
-
query = clause.rewrite(reader)
|
41
|
-
if (query != clause) # clause rewrote: must clone
|
42
|
-
if (clone == nil)
|
43
|
-
clone = self.clone()
|
44
|
-
end
|
45
|
-
clone.clauses[i] = query
|
46
|
-
end
|
47
|
-
end
|
48
|
-
if (clone != nil)
|
49
|
-
return clone # some clauses rewrote
|
50
|
-
else
|
51
|
-
return self # no clauses rewrote
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
def to_s(field = nil)
|
56
|
-
buffer = "spanOr(["
|
57
|
-
buffer << @clauses.map {|c| c.to_s(field()) }.join(", ")
|
58
|
-
buffer << "])"
|
59
|
-
return buffer
|
60
|
-
end
|
61
|
-
|
62
|
-
def eql?(o)
|
63
|
-
return false if (o == nil or self.class() != o.class())
|
64
|
-
|
65
|
-
return false if (@clauses != o.clauses)
|
66
|
-
return false if (@field != o.field)
|
67
|
-
|
68
|
-
return true
|
69
|
-
end
|
70
|
-
alias :== :eql?
|
71
|
-
|
72
|
-
def hash()
|
73
|
-
return @clauses.hash ^ @field.hash
|
74
|
-
end
|
75
|
-
|
76
|
-
class SpanQueue < Ferret::Utils::PriorityQueue
|
77
|
-
def less_than(o1, o2)
|
78
|
-
if (o1.doc == o2.doc)
|
79
|
-
if (o1.start == o2.start)
|
80
|
-
return o1.finish < o2.finish
|
81
|
-
else
|
82
|
-
return o1.start < o2.start
|
83
|
-
end
|
84
|
-
else
|
85
|
-
return o1.doc < o2.doc
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
def spans(reader)
|
91
|
-
if (@clauses.size == 1) # optimize 1-clause case
|
92
|
-
return @clauses[0].spans(reader)
|
93
|
-
end
|
94
|
-
|
95
|
-
return SpanOrEnum.new(self, reader)
|
96
|
-
end
|
97
|
-
|
98
|
-
class SpanOrEnum < SpansEnum
|
99
|
-
def initialize(query, reader)
|
100
|
-
@query = query
|
101
|
-
@queue = SpanQueue.new(query.clauses.size)
|
102
|
-
@all = query.clauses.map {|c| c.spans(reader)}
|
103
|
-
@first_time = true
|
104
|
-
end
|
105
|
-
|
106
|
-
def next?
|
107
|
-
if (@first_time) # first time -- initialize
|
108
|
-
@all.delete_if do |spans|
|
109
|
-
if (spans.next?) # move to first entry
|
110
|
-
@queue.push(spans) # build queue
|
111
|
-
next false
|
112
|
-
else
|
113
|
-
next true
|
114
|
-
end
|
115
|
-
end
|
116
|
-
@first_time = false
|
117
|
-
return @queue.size() != 0
|
118
|
-
end
|
119
|
-
|
120
|
-
if @queue.size == 0 # all done
|
121
|
-
return false
|
122
|
-
end
|
123
|
-
|
124
|
-
if top().next? # move to next
|
125
|
-
@queue.adjust_top()
|
126
|
-
return true
|
127
|
-
end
|
128
|
-
|
129
|
-
@all.delete(@queue.pop()) # exhausted a clause
|
130
|
-
|
131
|
-
return @queue.size() != 0
|
132
|
-
end
|
133
|
-
|
134
|
-
def top() return @queue.top() end
|
135
|
-
|
136
|
-
def skip_to(target)
|
137
|
-
if (@first_time)
|
138
|
-
@all.delete_if do |spans|
|
139
|
-
if (spans.skip_to(target)) # skip each spans in all
|
140
|
-
@queue.push(spans) # build queue
|
141
|
-
next false
|
142
|
-
else
|
143
|
-
next true
|
144
|
-
end
|
145
|
-
end
|
146
|
-
@first_time = false
|
147
|
-
else
|
148
|
-
while (@queue.size != 0 and top().doc < target)
|
149
|
-
if (top().skip_to(target))
|
150
|
-
@queue.adjust_top()
|
151
|
-
else
|
152
|
-
@all.delete(@queue.pop())
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
return @queue.size() != 0
|
158
|
-
end
|
159
|
-
|
160
|
-
def doc() top().doc() end
|
161
|
-
def start() top().start() end
|
162
|
-
def finish() top().finish() end
|
163
|
-
|
164
|
-
def to_s()
|
165
|
-
buffer = "spans(#{@query})@"
|
166
|
-
if @first_time
|
167
|
-
buffer << "START"
|
168
|
-
else
|
169
|
-
buffer << (@queue.size>0 ? ("#{doc}:#{start()}-#{finish}") : "END")
|
170
|
-
end
|
171
|
-
return buffer
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
|
-
end
|
176
|
-
end
|