ferret 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README +109 -0
- data/Rakefile +275 -0
- data/TODO +9 -0
- data/TUTORIAL +197 -0
- data/ext/extconf.rb +3 -0
- data/ext/ferret.c +23 -0
- data/ext/ferret.h +85 -0
- data/ext/index_io.c +543 -0
- data/ext/priority_queue.c +227 -0
- data/ext/ram_directory.c +316 -0
- data/ext/segment_merge_queue.c +41 -0
- data/ext/string_helper.c +42 -0
- data/ext/tags +240 -0
- data/ext/term.c +261 -0
- data/ext/term_buffer.c +299 -0
- data/ext/util.c +12 -0
- data/lib/ferret.rb +41 -0
- data/lib/ferret/analysis.rb +11 -0
- data/lib/ferret/analysis/analyzers.rb +93 -0
- data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
- data/lib/ferret/analysis/token.rb +79 -0
- data/lib/ferret/analysis/token_filters.rb +86 -0
- data/lib/ferret/analysis/token_stream.rb +26 -0
- data/lib/ferret/analysis/tokenizers.rb +107 -0
- data/lib/ferret/analysis/word_list_loader.rb +27 -0
- data/lib/ferret/document.rb +2 -0
- data/lib/ferret/document/document.rb +152 -0
- data/lib/ferret/document/field.rb +304 -0
- data/lib/ferret/index.rb +26 -0
- data/lib/ferret/index/compound_file_io.rb +343 -0
- data/lib/ferret/index/document_writer.rb +288 -0
- data/lib/ferret/index/field_infos.rb +259 -0
- data/lib/ferret/index/fields_io.rb +175 -0
- data/lib/ferret/index/index.rb +228 -0
- data/lib/ferret/index/index_file_names.rb +33 -0
- data/lib/ferret/index/index_reader.rb +462 -0
- data/lib/ferret/index/index_writer.rb +488 -0
- data/lib/ferret/index/multi_reader.rb +363 -0
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
- data/lib/ferret/index/segment_infos.rb +130 -0
- data/lib/ferret/index/segment_merge_info.rb +47 -0
- data/lib/ferret/index/segment_merge_queue.rb +16 -0
- data/lib/ferret/index/segment_merger.rb +337 -0
- data/lib/ferret/index/segment_reader.rb +380 -0
- data/lib/ferret/index/segment_term_enum.rb +178 -0
- data/lib/ferret/index/segment_term_vector.rb +58 -0
- data/lib/ferret/index/term.rb +49 -0
- data/lib/ferret/index/term_buffer.rb +88 -0
- data/lib/ferret/index/term_doc_enum.rb +283 -0
- data/lib/ferret/index/term_enum.rb +52 -0
- data/lib/ferret/index/term_info.rb +41 -0
- data/lib/ferret/index/term_infos_io.rb +312 -0
- data/lib/ferret/index/term_vector_offset_info.rb +20 -0
- data/lib/ferret/index/term_vectors_io.rb +552 -0
- data/lib/ferret/query_parser.rb +274 -0
- data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
- data/lib/ferret/search.rb +49 -0
- data/lib/ferret/search/boolean_clause.rb +100 -0
- data/lib/ferret/search/boolean_query.rb +303 -0
- data/lib/ferret/search/boolean_scorer.rb +294 -0
- data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
- data/lib/ferret/search/conjunction_scorer.rb +99 -0
- data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
- data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
- data/lib/ferret/search/explanation.rb +41 -0
- data/lib/ferret/search/field_cache.rb +216 -0
- data/lib/ferret/search/field_doc.rb +31 -0
- data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
- data/lib/ferret/search/filter.rb +11 -0
- data/lib/ferret/search/filtered_query.rb +130 -0
- data/lib/ferret/search/filtered_term_enum.rb +79 -0
- data/lib/ferret/search/fuzzy_query.rb +153 -0
- data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
- data/lib/ferret/search/hit_collector.rb +34 -0
- data/lib/ferret/search/hit_queue.rb +11 -0
- data/lib/ferret/search/index_searcher.rb +173 -0
- data/lib/ferret/search/match_all_docs_query.rb +104 -0
- data/lib/ferret/search/multi_phrase_query.rb +204 -0
- data/lib/ferret/search/multi_term_query.rb +65 -0
- data/lib/ferret/search/non_matching_scorer.rb +22 -0
- data/lib/ferret/search/phrase_positions.rb +55 -0
- data/lib/ferret/search/phrase_query.rb +217 -0
- data/lib/ferret/search/phrase_scorer.rb +153 -0
- data/lib/ferret/search/prefix_query.rb +47 -0
- data/lib/ferret/search/query.rb +111 -0
- data/lib/ferret/search/query_filter.rb +51 -0
- data/lib/ferret/search/range_filter.rb +103 -0
- data/lib/ferret/search/range_query.rb +139 -0
- data/lib/ferret/search/req_excl_scorer.rb +125 -0
- data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
- data/lib/ferret/search/score_doc.rb +38 -0
- data/lib/ferret/search/score_doc_comparator.rb +114 -0
- data/lib/ferret/search/scorer.rb +91 -0
- data/lib/ferret/search/similarity.rb +278 -0
- data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
- data/lib/ferret/search/sort.rb +105 -0
- data/lib/ferret/search/sort_comparator.rb +60 -0
- data/lib/ferret/search/sort_field.rb +87 -0
- data/lib/ferret/search/spans.rb +12 -0
- data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
- data/lib/ferret/search/spans/span_first_query.rb +79 -0
- data/lib/ferret/search/spans/span_near_query.rb +108 -0
- data/lib/ferret/search/spans/span_not_query.rb +130 -0
- data/lib/ferret/search/spans/span_or_query.rb +176 -0
- data/lib/ferret/search/spans/span_query.rb +25 -0
- data/lib/ferret/search/spans/span_scorer.rb +74 -0
- data/lib/ferret/search/spans/span_term_query.rb +105 -0
- data/lib/ferret/search/spans/span_weight.rb +84 -0
- data/lib/ferret/search/spans/spans_enum.rb +44 -0
- data/lib/ferret/search/term_query.rb +128 -0
- data/lib/ferret/search/term_scorer.rb +181 -0
- data/lib/ferret/search/top_docs.rb +24 -0
- data/lib/ferret/search/top_field_docs.rb +17 -0
- data/lib/ferret/search/weight.rb +54 -0
- data/lib/ferret/search/wildcard_query.rb +26 -0
- data/lib/ferret/search/wildcard_term_enum.rb +61 -0
- data/lib/ferret/stemmers.rb +1 -0
- data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
- data/lib/ferret/store.rb +5 -0
- data/lib/ferret/store/buffered_index_io.rb +191 -0
- data/lib/ferret/store/directory.rb +139 -0
- data/lib/ferret/store/fs_store.rb +338 -0
- data/lib/ferret/store/index_io.rb +259 -0
- data/lib/ferret/store/ram_store.rb +282 -0
- data/lib/ferret/utils.rb +7 -0
- data/lib/ferret/utils/bit_vector.rb +105 -0
- data/lib/ferret/utils/date_tools.rb +138 -0
- data/lib/ferret/utils/number_tools.rb +91 -0
- data/lib/ferret/utils/parameter.rb +41 -0
- data/lib/ferret/utils/priority_queue.rb +120 -0
- data/lib/ferret/utils/string_helper.rb +47 -0
- data/lib/ferret/utils/weak_key_hash.rb +51 -0
- data/rake_utils/code_statistics.rb +106 -0
- data/setup.rb +1551 -0
- data/test/benchmark/tb_ram_store.rb +76 -0
- data/test/benchmark/tb_rw_vint.rb +26 -0
- data/test/longrunning/tc_numbertools.rb +60 -0
- data/test/longrunning/tm_store.rb +19 -0
- data/test/test_all.rb +9 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/analysis/tc_analyzer.rb +21 -0
- data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
- data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
- data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
- data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
- data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
- data/test/unit/analysis/tc_stop_filter.rb +14 -0
- data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
- data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_word_list_loader.rb +32 -0
- data/test/unit/document/tc_document.rb +47 -0
- data/test/unit/document/tc_field.rb +80 -0
- data/test/unit/index/tc_compound_file_io.rb +107 -0
- data/test/unit/index/tc_field_infos.rb +119 -0
- data/test/unit/index/tc_fields_io.rb +167 -0
- data/test/unit/index/tc_index.rb +140 -0
- data/test/unit/index/tc_index_reader.rb +622 -0
- data/test/unit/index/tc_index_writer.rb +57 -0
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
- data/test/unit/index/tc_segment_infos.rb +74 -0
- data/test/unit/index/tc_segment_term_docs.rb +17 -0
- data/test/unit/index/tc_segment_term_enum.rb +60 -0
- data/test/unit/index/tc_segment_term_vector.rb +71 -0
- data/test/unit/index/tc_term.rb +22 -0
- data/test/unit/index/tc_term_buffer.rb +57 -0
- data/test/unit/index/tc_term_info.rb +19 -0
- data/test/unit/index/tc_term_infos_io.rb +192 -0
- data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
- data/test/unit/index/tc_term_vectors_io.rb +108 -0
- data/test/unit/index/th_doc.rb +244 -0
- data/test/unit/query_parser/tc_query_parser.rb +84 -0
- data/test/unit/search/tc_filter.rb +113 -0
- data/test/unit/search/tc_fuzzy_query.rb +136 -0
- data/test/unit/search/tc_index_searcher.rb +188 -0
- data/test/unit/search/tc_search_and_sort.rb +98 -0
- data/test/unit/search/tc_similarity.rb +37 -0
- data/test/unit/search/tc_sort.rb +48 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +153 -0
- data/test/unit/store/tc_fs_store.rb +84 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +180 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/ts_analysis.rb +16 -0
- data/test/unit/ts_document.rb +4 -0
- data/test/unit/ts_index.rb +18 -0
- data/test/unit/ts_query_parser.rb +3 -0
- data/test/unit/ts_search.rb +10 -0
- data/test/unit/ts_store.rb +6 -0
- data/test/unit/ts_utils.rb +10 -0
- data/test/unit/utils/tc_bit_vector.rb +65 -0
- data/test/unit/utils/tc_date_tools.rb +50 -0
- data/test/unit/utils/tc_number_tools.rb +59 -0
- data/test/unit/utils/tc_parameter.rb +40 -0
- data/test/unit/utils/tc_priority_queue.rb +62 -0
- data/test/unit/utils/tc_string_helper.rb +21 -0
- data/test/unit/utils/tc_weak_key_hash.rb +25 -0
- metadata +251 -0
@@ -0,0 +1,79 @@
|
|
1
|
+
module Ferret::Search::Spans
|
2
|
+
# Matches spans near the beginning of a field.
|
3
|
+
class SpanFirstQuery < SpanQuery
|
4
|
+
# Construct a SpanFirstQuery matching spans in +match+ whose finish
|
5
|
+
# position is less than or equal to +finish+.
|
6
|
+
def initialize(match, finish)
|
7
|
+
super()
|
8
|
+
@match = match
|
9
|
+
@finish = finish
|
10
|
+
end
|
11
|
+
|
12
|
+
# Return the SpanQuery whose matches are filtered.
|
13
|
+
def match() @match end
|
14
|
+
|
15
|
+
# Return the maximum finish position permitted in a match.
|
16
|
+
def finish() @finish end
|
17
|
+
|
18
|
+
def field() @match.field() end
|
19
|
+
|
20
|
+
def terms() @match.terms() end
|
21
|
+
|
22
|
+
def to_s(field = nil)
|
23
|
+
return "span_first(#{@match.to_s(field)}, #{finish})"
|
24
|
+
end
|
25
|
+
|
26
|
+
def spans(reader)
|
27
|
+
SpanFirstEnum.new(self, reader)
|
28
|
+
end
|
29
|
+
|
30
|
+
class SpanFirstEnum < SpansEnum
|
31
|
+
def initialize(query, reader)
|
32
|
+
super()
|
33
|
+
@query = query
|
34
|
+
@spans = @query.match.spans(reader)
|
35
|
+
end
|
36
|
+
|
37
|
+
def next?()
|
38
|
+
while (@spans.next?()) # scan to next match
|
39
|
+
return true if (finish() <= @query.finish)
|
40
|
+
end
|
41
|
+
return false
|
42
|
+
end
|
43
|
+
|
44
|
+
def skip_to(target)
|
45
|
+
if not @spans.skip_to(target)
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
|
49
|
+
if (@spans.finish <= @query.finish) # there is a match
|
50
|
+
return true
|
51
|
+
end
|
52
|
+
|
53
|
+
return next?() # scan to next match
|
54
|
+
end
|
55
|
+
|
56
|
+
def doc() @spans.doc() end
|
57
|
+
def start() @spans.start() end
|
58
|
+
def finish() @spans.finish() end
|
59
|
+
|
60
|
+
def to_s() "spans(#{@query})" end
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def rewrite(reader)
|
65
|
+
clone = nil
|
66
|
+
rewritten = @match.rewrite(reader)
|
67
|
+
if (rewritten != @match)
|
68
|
+
clone = self.clone()
|
69
|
+
clone.match = rewritten
|
70
|
+
end
|
71
|
+
|
72
|
+
if (clone != nil)
|
73
|
+
return clone # some clauses rewrote
|
74
|
+
else
|
75
|
+
return self # no clauses rewrote
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
module Ferret::Search::Spans
|
2
|
+
# Matches spans which are near one another. One can specify _slop_, the
|
3
|
+
# maximum number of intervening unmatched positions, as well as whether
|
4
|
+
# matches are required to be in-order.
|
5
|
+
class SpanNearQuery < SpanQuery
|
6
|
+
|
7
|
+
# Construct a SpanNearQuery. Matches spans matching a span from each
|
8
|
+
# clause, with up to +slop+ total unmatched positions between them. When
|
9
|
+
# +in_order+ is true, the spans from each clause must be ordered as in
|
10
|
+
# +clauses+.
|
11
|
+
def initialize(clauses, slop, in_order)
|
12
|
+
super()
|
13
|
+
# copy clauses array into an ArrayList
|
14
|
+
@clauses = Array.new(clauses.length)
|
15
|
+
@field = nil
|
16
|
+
clauses.each_index do |i|
|
17
|
+
clause = clauses[i]
|
18
|
+
if i == 0 # check field
|
19
|
+
@field = clause.field()
|
20
|
+
elsif clause.field() != @field
|
21
|
+
raise ArgumentError, "Clauses must have same field."
|
22
|
+
end
|
23
|
+
@clauses[i] = clause
|
24
|
+
end
|
25
|
+
|
26
|
+
@slop = slop
|
27
|
+
@in_order = in_order
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return the clauses whose spans are matched.
|
31
|
+
def clauses() @clauses end
|
32
|
+
|
33
|
+
# Return the maximum number of intervening unmatched positions permitted.
|
34
|
+
def slop() @slop end
|
35
|
+
|
36
|
+
# Return true if matches are required to be in-order.
|
37
|
+
def in_order?() @in_order end
|
38
|
+
|
39
|
+
attr_reader :field
|
40
|
+
|
41
|
+
def terms()
|
42
|
+
terms = []
|
43
|
+
@clauses.each do |clause|
|
44
|
+
terms += clause.terms
|
45
|
+
end
|
46
|
+
return terms
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_s(field = nil)
|
50
|
+
buffer = "span_near(["
|
51
|
+
buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
|
52
|
+
buffer << "], #{@stop}, #{@in_order})"
|
53
|
+
return buffer
|
54
|
+
end
|
55
|
+
|
56
|
+
def spans(reader)
|
57
|
+
if (@clauses.size() == 0) # optimize 0-clause case
|
58
|
+
return SpanOrQuery.new(@clauses).spans(reader)
|
59
|
+
end
|
60
|
+
|
61
|
+
if (@clauses.size() == 1) # optimize 1-clause case
|
62
|
+
return @clauses[0].spans(reader)
|
63
|
+
end
|
64
|
+
|
65
|
+
return NearSpansEnum.new(self, reader)
|
66
|
+
end
|
67
|
+
|
68
|
+
def rewrite(reader)
|
69
|
+
clone = nil
|
70
|
+
@clauses.each_index do |i|
|
71
|
+
clause = @clauses[i]
|
72
|
+
query = clause.rewrite(reader)
|
73
|
+
if (query != clause) # clause rewrote: must clone
|
74
|
+
if (clone == nil)
|
75
|
+
clone = self.clone()
|
76
|
+
end
|
77
|
+
clone.clauses[i] = query
|
78
|
+
end
|
79
|
+
end
|
80
|
+
if (clone != nil)
|
81
|
+
return clone # some clauses rewrote
|
82
|
+
else
|
83
|
+
return self # no clauses rewrote
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns true iff +o+ is equal to this.
|
88
|
+
def eql?(o)
|
89
|
+
return false if (o == nil or self.class() != o.class())
|
90
|
+
|
91
|
+
return false if (@in_order != o.in_order?)
|
92
|
+
return false if (@slop != o.slop)
|
93
|
+
return false if (@clauses != o.clauses)
|
94
|
+
return false if (@field != o.field)
|
95
|
+
|
96
|
+
return true
|
97
|
+
end
|
98
|
+
alias :== :eql?
|
99
|
+
|
100
|
+
def hash()
|
101
|
+
result = @clauses.hash()
|
102
|
+
result += @slop * 29
|
103
|
+
result += (@in_order ? 1 : 0)
|
104
|
+
result ^= @field.hash()
|
105
|
+
return result
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Ferret::Search::Spans
|
2
|
+
# Removes matches which overlap with another SpanQuery.
|
3
|
+
class SpanNotQuery < SpanQuery
|
4
|
+
# Construct a SpanNotQuery matching spans from +incl+ which
|
5
|
+
# have no overlap with spans from +excl+.
|
6
|
+
def initialize(incl, excl)
|
7
|
+
super()
|
8
|
+
@incl = incl
|
9
|
+
@excl = excl
|
10
|
+
|
11
|
+
if incl.field != excl.field
|
12
|
+
raise ArgumentError, "Clauses must have same field."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Return the SpanQuery whose matches are filtered.
|
17
|
+
def incl() @incl end
|
18
|
+
|
19
|
+
# Return the SpanQuery whose matches must not overlap those returned.
|
20
|
+
def excl() @excl end
|
21
|
+
|
22
|
+
def field() @incl.field() end
|
23
|
+
|
24
|
+
def terms() @incl.terms() end
|
25
|
+
|
26
|
+
def to_s(field = nil)
|
27
|
+
return "span_not(#{incl.to_s(field)}, #{excl.to_s(field)})"
|
28
|
+
end
|
29
|
+
|
30
|
+
def spans(reader)
|
31
|
+
return SpanNotEnum.new(self, reader)
|
32
|
+
end
|
33
|
+
|
34
|
+
class SpanNotEnum < SpansEnum
|
35
|
+
def initialize(query, reader)
|
36
|
+
@query = query
|
37
|
+
@incl_spans = @query.incl.spans(reader)
|
38
|
+
@more_incl = true
|
39
|
+
@excl_spans = @query.excl.spans(reader)
|
40
|
+
@more_excl = @excl_spans.next? # excl_spans needs to be preset
|
41
|
+
end
|
42
|
+
|
43
|
+
def next?()
|
44
|
+
if (@more_incl) # move to next incl
|
45
|
+
@more_incl = @incl_spans.next?()
|
46
|
+
end
|
47
|
+
|
48
|
+
while (@more_incl and @more_excl)
|
49
|
+
if (@incl_spans.doc > @excl_spans.doc) # skip excl
|
50
|
+
@more_excl = @excl_spans.skip_to(@incl_spans.doc)
|
51
|
+
end
|
52
|
+
|
53
|
+
while (@more_excl and # while excl is before
|
54
|
+
@incl_spans.doc == @excl_spans.doc and
|
55
|
+
@excl_spans.finish <= @incl_spans.start)
|
56
|
+
@more_excl = @excl_spans.next? # increment excl
|
57
|
+
end
|
58
|
+
|
59
|
+
if (not @more_excl or # if no intersection
|
60
|
+
@incl_spans.doc != @excl_spans.doc or
|
61
|
+
@incl_spans.finish <= @excl_spans.start)
|
62
|
+
break # we found a match
|
63
|
+
end
|
64
|
+
|
65
|
+
@more_incl = @incl_spans.next? # intersected: keep scanning
|
66
|
+
end
|
67
|
+
return @more_incl
|
68
|
+
end
|
69
|
+
|
70
|
+
def skip_to(target)
|
71
|
+
if @more_incl # skip incl
|
72
|
+
@more_incl = @incl_spans.skip_to(target)
|
73
|
+
end
|
74
|
+
|
75
|
+
if not @more_incl
|
76
|
+
return false
|
77
|
+
end
|
78
|
+
|
79
|
+
if (@more_excl and @incl_spans.doc > @excl_spans.doc) # skip excl
|
80
|
+
@more_excl = @excl_spans.skip_to(@incl_spans.doc)
|
81
|
+
end
|
82
|
+
|
83
|
+
while (@more_excl and # while excl is before
|
84
|
+
@incl_spans.doc == @excl_spans.doc and
|
85
|
+
@excl_spans.finish <= @incl_spans.start)
|
86
|
+
@more_excl = @excl_spans.next? # increment excl
|
87
|
+
end
|
88
|
+
|
89
|
+
if (not @more_excl or # if no intersection
|
90
|
+
@incl_spans.doc != @excl_spans.doc or
|
91
|
+
@incl_spans.finish <= @excl_spans.start)
|
92
|
+
return true # we found a match
|
93
|
+
end
|
94
|
+
|
95
|
+
return next?() # scan to next match
|
96
|
+
end
|
97
|
+
|
98
|
+
def doc() @incl_spans.doc end
|
99
|
+
def start() @incl_spans.start end
|
100
|
+
def finish() @incl_spans.finish end
|
101
|
+
|
102
|
+
def to_s()
|
103
|
+
return "spans(#{@query})"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def rewrite(reader)
|
108
|
+
clone = nil
|
109
|
+
|
110
|
+
rewritten_incl = @incl.rewrite(reader)
|
111
|
+
if (rewritten_incl != @incl)
|
112
|
+
clone = self.clone()
|
113
|
+
clone.incl = rewritten_incl
|
114
|
+
end
|
115
|
+
|
116
|
+
rewritten_excl = @excl.rewrite(reader)
|
117
|
+
if (rewritten_excl != @excl)
|
118
|
+
clone = self.clone() if (clone == nil)
|
119
|
+
clone.excl = rewritten_excl
|
120
|
+
end
|
121
|
+
|
122
|
+
if (clone != nil)
|
123
|
+
return clone # some clauses rewrote
|
124
|
+
else
|
125
|
+
return self # no clauses rewrote
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
module Ferret::Search::Spans
|
2
|
+
# Matches the union of its clauses.
|
3
|
+
class SpanOrQuery < SpanQuery
|
4
|
+
|
5
|
+
# Construct a SpanOrQuery merging the provided clauses.
|
6
|
+
def initialize(clauses)
|
7
|
+
super()
|
8
|
+
|
9
|
+
# copy clauses array into an ArrayList
|
10
|
+
@clauses = Array.new(clauses.length)
|
11
|
+
@field = nil
|
12
|
+
clauses.each_index do |i|
|
13
|
+
clause = clauses[i]
|
14
|
+
if i == 0 # check field
|
15
|
+
@field = clause.field()
|
16
|
+
elsif clause.field() != @field
|
17
|
+
raise ArgumentError, "Clauses must have same field."
|
18
|
+
end
|
19
|
+
@clauses[i] = clause
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Return the clauses whose spans are matched.
|
24
|
+
def clauses() @clauses end
|
25
|
+
|
26
|
+
attr_reader :field
|
27
|
+
|
28
|
+
def terms()
|
29
|
+
terms = []
|
30
|
+
@clauses.each do |clause|
|
31
|
+
terms += clause.terms
|
32
|
+
end
|
33
|
+
return terms
|
34
|
+
end
|
35
|
+
|
36
|
+
def rewrite(reader)
|
37
|
+
clone = nil
|
38
|
+
@clauses.each_index do |i|
|
39
|
+
clause = @clauses[i]
|
40
|
+
query = clause.rewrite(reader)
|
41
|
+
if (query != clause) # clause rewrote: must clone
|
42
|
+
if (clone == nil)
|
43
|
+
clone = self.clone()
|
44
|
+
end
|
45
|
+
clone.clauses[i] = query
|
46
|
+
end
|
47
|
+
end
|
48
|
+
if (clone != nil)
|
49
|
+
return clone # some clauses rewrote
|
50
|
+
else
|
51
|
+
return self # no clauses rewrote
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s(field = nil)
|
56
|
+
buffer = "spanOr(["
|
57
|
+
buffer << @clauses.map {|c| c.to_s(field()) }.join(", ")
|
58
|
+
buffer << "])"
|
59
|
+
return buffer
|
60
|
+
end
|
61
|
+
|
62
|
+
def eql?(o)
|
63
|
+
return false if (o == nil or self.class() != o.class())
|
64
|
+
|
65
|
+
return false if (@clauses != o.clauses)
|
66
|
+
return false if (@field != o.field)
|
67
|
+
|
68
|
+
return true
|
69
|
+
end
|
70
|
+
alias :== :eql?
|
71
|
+
|
72
|
+
def hash()
|
73
|
+
return @clauses.hash ^ @field.hash
|
74
|
+
end
|
75
|
+
|
76
|
+
class SpanQueue < Ferret::Utils::PriorityQueue
|
77
|
+
def less_than(o1, o2)
|
78
|
+
if (o1.doc == o2.doc)
|
79
|
+
if (o1.start == o2.start)
|
80
|
+
return o1.finish < o2.finish
|
81
|
+
else
|
82
|
+
return o1.start < o2.start
|
83
|
+
end
|
84
|
+
else
|
85
|
+
return o1.doc < o2.doc
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def spans(reader)
|
91
|
+
if (@clauses.size == 1) # optimize 1-clause case
|
92
|
+
return @clauses[0].spans(reader)
|
93
|
+
end
|
94
|
+
|
95
|
+
return SpanOrEnum.new(self, reader)
|
96
|
+
end
|
97
|
+
|
98
|
+
class SpanOrEnum < SpansEnum
|
99
|
+
def initialize(query, reader)
|
100
|
+
@query = query
|
101
|
+
@queue = SpanQueue.new(query.clauses.size)
|
102
|
+
@all = query.clauses.map {|c| c.spans(reader)}
|
103
|
+
@first_time = true
|
104
|
+
end
|
105
|
+
|
106
|
+
def next?
|
107
|
+
if (@first_time) # first time -- initialize
|
108
|
+
@all.delete_if do |spans|
|
109
|
+
if (spans.next?) # move to first entry
|
110
|
+
@queue.push(spans) # build queue
|
111
|
+
next false
|
112
|
+
else
|
113
|
+
next true
|
114
|
+
end
|
115
|
+
end
|
116
|
+
@first_time = false
|
117
|
+
return @queue.size() != 0
|
118
|
+
end
|
119
|
+
|
120
|
+
if @queue.size == 0 # all done
|
121
|
+
return false
|
122
|
+
end
|
123
|
+
|
124
|
+
if top().next? # move to next
|
125
|
+
@queue.adjust_top()
|
126
|
+
return true
|
127
|
+
end
|
128
|
+
|
129
|
+
@all.delete(@queue.pop()) # exhausted a clause
|
130
|
+
|
131
|
+
return @queue.size() != 0
|
132
|
+
end
|
133
|
+
|
134
|
+
def top() return @queue.top() end
|
135
|
+
|
136
|
+
def skip_to(target)
|
137
|
+
if (@first_time)
|
138
|
+
@all.delete_if do |spans|
|
139
|
+
if (spans.skip_to(target)) # skip each spans in all
|
140
|
+
@queue.push(spans) # build queue
|
141
|
+
next false
|
142
|
+
else
|
143
|
+
next true
|
144
|
+
end
|
145
|
+
end
|
146
|
+
@first_time = false
|
147
|
+
else
|
148
|
+
while (@queue.size != 0 and top().doc < target)
|
149
|
+
if (top().skip_to(target))
|
150
|
+
@queue.adjust_top()
|
151
|
+
else
|
152
|
+
@all.delete(@queue.pop())
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
return @queue.size() != 0
|
158
|
+
end
|
159
|
+
|
160
|
+
def doc() top().doc() end
|
161
|
+
def start() top().start() end
|
162
|
+
def finish() top().finish() end
|
163
|
+
|
164
|
+
def to_s()
|
165
|
+
buffer = "spans(#{@query})@"
|
166
|
+
if @first_time
|
167
|
+
buffer << "START"
|
168
|
+
else
|
169
|
+
buffer << (@queue.size>0 ? ("#{doc}:#{start()}-#{finish}") : "END")
|
170
|
+
end
|
171
|
+
return buffer
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
end
|