ferret 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,79 @@
1
+ module Ferret::Search::Spans
2
+ # Matches spans near the beginning of a field.
3
+ class SpanFirstQuery < SpanQuery
4
+ # Construct a SpanFirstQuery matching spans in +match+ whose finish
5
+ # position is less than or equal to +finish+.
6
+ def initialize(match, finish)
7
+ super()
8
+ @match = match
9
+ @finish = finish
10
+ end
11
+
12
+ # Return the SpanQuery whose matches are filtered.
13
+ def match() @match end
14
+
15
+ # Return the maximum finish position permitted in a match.
16
+ def finish() @finish end
17
+
18
+ def field() @match.field() end
19
+
20
+ def terms() @match.terms() end
21
+
22
+ def to_s(field = nil)
23
+ return "span_first(#{@match.to_s(field)}, #{finish})"
24
+ end
25
+
26
+ def spans(reader)
27
+ SpanFirstEnum.new(self, reader)
28
+ end
29
+
30
+ class SpanFirstEnum < SpansEnum
31
+ def initialize(query, reader)
32
+ super()
33
+ @query = query
34
+ @spans = @query.match.spans(reader)
35
+ end
36
+
37
+ def next?()
38
+ while (@spans.next?()) # scan to next match
39
+ return true if (finish() <= @query.finish)
40
+ end
41
+ return false
42
+ end
43
+
44
+ def skip_to(target)
45
+ if not @spans.skip_to(target)
46
+ return false
47
+ end
48
+
49
+ if (@spans.finish <= @query.finish) # there is a match
50
+ return true
51
+ end
52
+
53
+ return next?() # scan to next match
54
+ end
55
+
56
+ def doc() @spans.doc() end
57
+ def start() @spans.start() end
58
+ def finish() @spans.finish() end
59
+
60
+ def to_s() "spans(#{@query})" end
61
+ end
62
+
63
+
64
+ def rewrite(reader)
65
+ clone = nil
66
+ rewritten = @match.rewrite(reader)
67
+ if (rewritten != @match)
68
+ clone = self.clone()
69
+ clone.match = rewritten
70
+ end
71
+
72
+ if (clone != nil)
73
+ return clone # some clauses rewrote
74
+ else
75
+ return self # no clauses rewrote
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,108 @@
1
+ module Ferret::Search::Spans
2
+ # Matches spans which are near one another. One can specify _slop_, the
3
+ # maximum number of intervening unmatched positions, as well as whether
4
+ # matches are required to be in-order.
5
+ class SpanNearQuery < SpanQuery
6
+
7
+ # Construct a SpanNearQuery. Matches spans matching a span from each
8
+ # clause, with up to +slop+ total unmatched positions between them. When
9
+ # +in_order+ is true, the spans from each clause must be ordered as in
10
+ # +clauses+.
11
+ def initialize(clauses, slop, in_order)
12
+ super()
13
+ # copy clauses array into an ArrayList
14
+ @clauses = Array.new(clauses.length)
15
+ @field = nil
16
+ clauses.each_index do |i|
17
+ clause = clauses[i]
18
+ if i == 0 # check field
19
+ @field = clause.field()
20
+ elsif clause.field() != @field
21
+ raise ArgumentError, "Clauses must have same field."
22
+ end
23
+ @clauses[i] = clause
24
+ end
25
+
26
+ @slop = slop
27
+ @in_order = in_order
28
+ end
29
+
30
+ # Return the clauses whose spans are matched.
31
+ def clauses() @clauses end
32
+
33
+ # Return the maximum number of intervening unmatched positions permitted.
34
+ def slop() @slop end
35
+
36
+ # Return true if matches are required to be in-order.
37
+ def in_order?() @in_order end
38
+
39
+ attr_reader :field
40
+
41
+ def terms()
42
+ terms = []
43
+ @clauses.each do |clause|
44
+ terms += clause.terms
45
+ end
46
+ return terms
47
+ end
48
+
49
+ def to_s(field = nil)
50
+ buffer = "span_near(["
51
+ buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
52
+ buffer << "], #{@stop}, #{@in_order})"
53
+ return buffer
54
+ end
55
+
56
+ def spans(reader)
57
+ if (@clauses.size() == 0) # optimize 0-clause case
58
+ return SpanOrQuery.new(@clauses).spans(reader)
59
+ end
60
+
61
+ if (@clauses.size() == 1) # optimize 1-clause case
62
+ return @clauses[0].spans(reader)
63
+ end
64
+
65
+ return NearSpansEnum.new(self, reader)
66
+ end
67
+
68
+ def rewrite(reader)
69
+ clone = nil
70
+ @clauses.each_index do |i|
71
+ clause = @clauses[i]
72
+ query = clause.rewrite(reader)
73
+ if (query != clause) # clause rewrote: must clone
74
+ if (clone == nil)
75
+ clone = self.clone()
76
+ end
77
+ clone.clauses[i] = query
78
+ end
79
+ end
80
+ if (clone != nil)
81
+ return clone # some clauses rewrote
82
+ else
83
+ return self # no clauses rewrote
84
+ end
85
+ end
86
+
87
+ # Returns true iff +o+ is equal to this.
88
+ def eql?(o)
89
+ return false if (o == nil or self.class() != o.class())
90
+
91
+ return false if (@in_order != o.in_order?)
92
+ return false if (@slop != o.slop)
93
+ return false if (@clauses != o.clauses)
94
+ return false if (@field != o.field)
95
+
96
+ return true
97
+ end
98
+ alias :== :eql?
99
+
100
+ def hash()
101
+ result = @clauses.hash()
102
+ result += @slop * 29
103
+ result += (@in_order ? 1 : 0)
104
+ result ^= @field.hash()
105
+ return result
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,130 @@
1
+ module Ferret::Search::Spans
2
+ # Removes matches which overlap with another SpanQuery.
3
+ class SpanNotQuery < SpanQuery
4
+ # Construct a SpanNotQuery matching spans from +incl+ which
5
+ # have no overlap with spans from +excl+.
6
+ def initialize(incl, excl)
7
+ super()
8
+ @incl = incl
9
+ @excl = excl
10
+
11
+ if incl.field != excl.field
12
+ raise ArgumentError, "Clauses must have same field."
13
+ end
14
+ end
15
+
16
+ # Return the SpanQuery whose matches are filtered.
17
+ def incl() @incl end
18
+
19
+ # Return the SpanQuery whose matches must not overlap those returned.
20
+ def excl() @excl end
21
+
22
+ def field() @incl.field() end
23
+
24
+ def terms() @incl.terms() end
25
+
26
+ def to_s(field = nil)
27
+ return "span_not(#{incl.to_s(field)}, #{excl.to_s(field)})"
28
+ end
29
+
30
+ def spans(reader)
31
+ return SpanNotEnum.new(self, reader)
32
+ end
33
+
34
+ class SpanNotEnum < SpansEnum
35
+ def initialize(query, reader)
36
+ @query = query
37
+ @incl_spans = @query.incl.spans(reader)
38
+ @more_incl = true
39
+ @excl_spans = @query.excl.spans(reader)
40
+ @more_excl = @excl_spans.next? # excl_spans needs to be preset
41
+ end
42
+
43
+ def next?()
44
+ if (@more_incl) # move to next incl
45
+ @more_incl = @incl_spans.next?()
46
+ end
47
+
48
+ while (@more_incl and @more_excl)
49
+ if (@incl_spans.doc > @excl_spans.doc) # skip excl
50
+ @more_excl = @excl_spans.skip_to(@incl_spans.doc)
51
+ end
52
+
53
+ while (@more_excl and # while excl is before
54
+ @incl_spans.doc == @excl_spans.doc and
55
+ @excl_spans.finish <= @incl_spans.start)
56
+ @more_excl = @excl_spans.next? # increment excl
57
+ end
58
+
59
+ if (not @more_excl or # if no intersection
60
+ @incl_spans.doc != @excl_spans.doc or
61
+ @incl_spans.finish <= @excl_spans.start)
62
+ break # we found a match
63
+ end
64
+
65
+ @more_incl = @incl_spans.next? # intersected: keep scanning
66
+ end
67
+ return @more_incl
68
+ end
69
+
70
+ def skip_to(target)
71
+ if @more_incl # skip incl
72
+ @more_incl = @incl_spans.skip_to(target)
73
+ end
74
+
75
+ if not @more_incl
76
+ return false
77
+ end
78
+
79
+ if (@more_excl and @incl_spans.doc > @excl_spans.doc) # skip excl
80
+ @more_excl = @excl_spans.skip_to(@incl_spans.doc)
81
+ end
82
+
83
+ while (@more_excl and # while excl is before
84
+ @incl_spans.doc == @excl_spans.doc and
85
+ @excl_spans.finish <= @incl_spans.start)
86
+ @more_excl = @excl_spans.next? # increment excl
87
+ end
88
+
89
+ if (not @more_excl or # if no intersection
90
+ @incl_spans.doc != @excl_spans.doc or
91
+ @incl_spans.finish <= @excl_spans.start)
92
+ return true # we found a match
93
+ end
94
+
95
+ return next?() # scan to next match
96
+ end
97
+
98
+ def doc() @incl_spans.doc end
99
+ def start() @incl_spans.start end
100
+ def finish() @incl_spans.finish end
101
+
102
+ def to_s()
103
+ return "spans(#{@query})"
104
+ end
105
+ end
106
+
107
+ def rewrite(reader)
108
+ clone = nil
109
+
110
+ rewritten_incl = @incl.rewrite(reader)
111
+ if (rewritten_incl != @incl)
112
+ clone = self.clone()
113
+ clone.incl = rewritten_incl
114
+ end
115
+
116
+ rewritten_excl = @excl.rewrite(reader)
117
+ if (rewritten_excl != @excl)
118
+ clone = self.clone() if (clone == nil)
119
+ clone.excl = rewritten_excl
120
+ end
121
+
122
+ if (clone != nil)
123
+ return clone # some clauses rewrote
124
+ else
125
+ return self # no clauses rewrote
126
+ end
127
+ end
128
+
129
+ end
130
+ end
@@ -0,0 +1,176 @@
1
+ module Ferret::Search::Spans
2
+ # Matches the union of its clauses.
3
+ class SpanOrQuery < SpanQuery
4
+
5
+ # Construct a SpanOrQuery merging the provided clauses.
6
+ def initialize(clauses)
7
+ super()
8
+
9
+ # copy clauses array into an ArrayList
10
+ @clauses = Array.new(clauses.length)
11
+ @field = nil
12
+ clauses.each_index do |i|
13
+ clause = clauses[i]
14
+ if i == 0 # check field
15
+ @field = clause.field()
16
+ elsif clause.field() != @field
17
+ raise ArgumentError, "Clauses must have same field."
18
+ end
19
+ @clauses[i] = clause
20
+ end
21
+ end
22
+
23
+ # Return the clauses whose spans are matched.
24
+ def clauses() @clauses end
25
+
26
+ attr_reader :field
27
+
28
+ def terms()
29
+ terms = []
30
+ @clauses.each do |clause|
31
+ terms += clause.terms
32
+ end
33
+ return terms
34
+ end
35
+
36
+ def rewrite(reader)
37
+ clone = nil
38
+ @clauses.each_index do |i|
39
+ clause = @clauses[i]
40
+ query = clause.rewrite(reader)
41
+ if (query != clause) # clause rewrote: must clone
42
+ if (clone == nil)
43
+ clone = self.clone()
44
+ end
45
+ clone.clauses[i] = query
46
+ end
47
+ end
48
+ if (clone != nil)
49
+ return clone # some clauses rewrote
50
+ else
51
+ return self # no clauses rewrote
52
+ end
53
+ end
54
+
55
+ def to_s(field = nil)
56
+ buffer = "spanOr(["
57
+ buffer << @clauses.map {|c| c.to_s(field()) }.join(", ")
58
+ buffer << "])"
59
+ return buffer
60
+ end
61
+
62
+ def eql?(o)
63
+ return false if (o == nil or self.class() != o.class())
64
+
65
+ return false if (@clauses != o.clauses)
66
+ return false if (@field != o.field)
67
+
68
+ return true
69
+ end
70
+ alias :== :eql?
71
+
72
+ def hash()
73
+ return @clauses.hash ^ @field.hash
74
+ end
75
+
76
+ class SpanQueue < Ferret::Utils::PriorityQueue
77
+ def less_than(o1, o2)
78
+ if (o1.doc == o2.doc)
79
+ if (o1.start == o2.start)
80
+ return o1.finish < o2.finish
81
+ else
82
+ return o1.start < o2.start
83
+ end
84
+ else
85
+ return o1.doc < o2.doc
86
+ end
87
+ end
88
+ end
89
+
90
+ def spans(reader)
91
+ if (@clauses.size == 1) # optimize 1-clause case
92
+ return @clauses[0].spans(reader)
93
+ end
94
+
95
+ return SpanOrEnum.new(self, reader)
96
+ end
97
+
98
+ class SpanOrEnum < SpansEnum
99
+ def initialize(query, reader)
100
+ @query = query
101
+ @queue = SpanQueue.new(query.clauses.size)
102
+ @all = query.clauses.map {|c| c.spans(reader)}
103
+ @first_time = true
104
+ end
105
+
106
+ def next?
107
+ if (@first_time) # first time -- initialize
108
+ @all.delete_if do |spans|
109
+ if (spans.next?) # move to first entry
110
+ @queue.push(spans) # build queue
111
+ next false
112
+ else
113
+ next true
114
+ end
115
+ end
116
+ @first_time = false
117
+ return @queue.size() != 0
118
+ end
119
+
120
+ if @queue.size == 0 # all done
121
+ return false
122
+ end
123
+
124
+ if top().next? # move to next
125
+ @queue.adjust_top()
126
+ return true
127
+ end
128
+
129
+ @all.delete(@queue.pop()) # exhausted a clause
130
+
131
+ return @queue.size() != 0
132
+ end
133
+
134
+ def top() return @queue.top() end
135
+
136
+ def skip_to(target)
137
+ if (@first_time)
138
+ @all.delete_if do |spans|
139
+ if (spans.skip_to(target)) # skip each spans in all
140
+ @queue.push(spans) # build queue
141
+ next false
142
+ else
143
+ next true
144
+ end
145
+ end
146
+ @first_time = false
147
+ else
148
+ while (@queue.size != 0 and top().doc < target)
149
+ if (top().skip_to(target))
150
+ @queue.adjust_top()
151
+ else
152
+ @all.delete(@queue.pop())
153
+ end
154
+ end
155
+ end
156
+
157
+ return @queue.size() != 0
158
+ end
159
+
160
+ def doc() top().doc() end
161
+ def start() top().start() end
162
+ def finish() top().finish() end
163
+
164
+ def to_s()
165
+ buffer = "spans(#{@query})@"
166
+ if @first_time
167
+ buffer << "START"
168
+ else
169
+ buffer << (@queue.size>0 ? ("#{doc}:#{start()}-#{finish}") : "END")
170
+ end
171
+ return buffer
172
+ end
173
+ end
174
+
175
+ end
176
+ end