ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,79 @@
1
+ module Ferret::Search::Spans
2
+ # Matches spans near the beginning of a field.
3
+ class SpanFirstQuery < SpanQuery
4
+ # Construct a SpanFirstQuery matching spans in +match+ whose finish
5
+ # position is less than or equal to +finish+.
6
+ def initialize(match, finish)
7
+ super()
8
+ @match = match
9
+ @finish = finish
10
+ end
11
+
12
+ # Return the SpanQuery whose matches are filtered.
13
+ def match() @match end
14
+
15
+ # Return the maximum finish position permitted in a match.
16
+ def finish() @finish end
17
+
18
+ def field() @match.field() end
19
+
20
+ def terms() @match.terms() end
21
+
22
+ def to_s(field = nil)
23
+ return "span_first(#{@match.to_s(field)}, #{finish})"
24
+ end
25
+
26
+ def spans(reader)
27
+ SpanFirstEnum.new(self, reader)
28
+ end
29
+
30
+ class SpanFirstEnum < SpansEnum
31
+ def initialize(query, reader)
32
+ super()
33
+ @query = query
34
+ @spans = @query.match.spans(reader)
35
+ end
36
+
37
+ def next?()
38
+ while (@spans.next?()) # scan to next match
39
+ return true if (finish() <= @query.finish)
40
+ end
41
+ return false
42
+ end
43
+
44
+ def skip_to(target)
45
+ if not @spans.skip_to(target)
46
+ return false
47
+ end
48
+
49
+ if (@spans.finish <= @query.finish) # there is a match
50
+ return true
51
+ end
52
+
53
+ return next?() # scan to next match
54
+ end
55
+
56
+ def doc() @spans.doc() end
57
+ def start() @spans.start() end
58
+ def finish() @spans.finish() end
59
+
60
+ def to_s() "spans(#{@query})" end
61
+ end
62
+
63
+
64
+ def rewrite(reader)
65
+ clone = nil
66
+ rewritten = @match.rewrite(reader)
67
+ if (rewritten != @match)
68
+ clone = self.clone()
69
+ clone.match = rewritten
70
+ end
71
+
72
+ if (clone != nil)
73
+ return clone # some clauses rewrote
74
+ else
75
+ return self # no clauses rewrote
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,108 @@
1
+ module Ferret::Search::Spans
2
+ # Matches spans which are near one another. One can specify _slop_, the
3
+ # maximum number of intervening unmatched positions, as well as whether
4
+ # matches are required to be in-order.
5
+ class SpanNearQuery < SpanQuery
6
+
7
+ # Construct a SpanNearQuery. Matches spans matching a span from each
8
+ # clause, with up to +slop+ total unmatched positions between them. When
9
+ # +in_order+ is true, the spans from each clause must be ordered as in
10
+ # +clauses+.
11
+ def initialize(clauses, slop, in_order)
12
+ super()
13
+ # copy clauses array into an ArrayList
14
+ @clauses = Array.new(clauses.length)
15
+ @field = nil
16
+ clauses.each_index do |i|
17
+ clause = clauses[i]
18
+ if i == 0 # check field
19
+ @field = clause.field()
20
+ elsif clause.field() != @field
21
+ raise ArgumentError, "Clauses must have same field."
22
+ end
23
+ @clauses[i] = clause
24
+ end
25
+
26
+ @slop = slop
27
+ @in_order = in_order
28
+ end
29
+
30
+ # Return the clauses whose spans are matched.
31
+ def clauses() @clauses end
32
+
33
+ # Return the maximum number of intervening unmatched positions permitted.
34
+ def slop() @slop end
35
+
36
+ # Return true if matches are required to be in-order.
37
+ def in_order?() @in_order end
38
+
39
+ attr_reader :field
40
+
41
+ def terms()
42
+ terms = []
43
+ @clauses.each do |clause|
44
+ terms += clause.terms
45
+ end
46
+ return terms
47
+ end
48
+
49
+ def to_s(field = nil)
50
+ buffer = "span_near(["
51
+ buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
52
+ buffer << "], #{@stop}, #{@in_order})"
53
+ return buffer
54
+ end
55
+
56
+ def spans(reader)
57
+ if (@clauses.size() == 0) # optimize 0-clause case
58
+ return SpanOrQuery.new(@clauses).spans(reader)
59
+ end
60
+
61
+ if (@clauses.size() == 1) # optimize 1-clause case
62
+ return @clauses[0].spans(reader)
63
+ end
64
+
65
+ return NearSpansEnum.new(self, reader)
66
+ end
67
+
68
+ def rewrite(reader)
69
+ clone = nil
70
+ @clauses.each_index do |i|
71
+ clause = @clauses[i]
72
+ query = clause.rewrite(reader)
73
+ if (query != clause) # clause rewrote: must clone
74
+ if (clone == nil)
75
+ clone = self.clone()
76
+ end
77
+ clone.clauses[i] = query
78
+ end
79
+ end
80
+ if (clone != nil)
81
+ return clone # some clauses rewrote
82
+ else
83
+ return self # no clauses rewrote
84
+ end
85
+ end
86
+
87
+ # Returns true iff +o+ is equal to this.
88
+ def eql?(o)
89
+ return false if (o == nil or self.class() != o.class())
90
+
91
+ return false if (@in_order != o.in_order?)
92
+ return false if (@slop != o.slop)
93
+ return false if (@clauses != o.clauses)
94
+ return false if (@field != o.field)
95
+
96
+ return true
97
+ end
98
+ alias :== :eql?
99
+
100
+ def hash()
101
+ result = @clauses.hash()
102
+ result += @slop * 29
103
+ result += (@in_order ? 1 : 0)
104
+ result ^= @field.hash()
105
+ return result
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,130 @@
1
+ module Ferret::Search::Spans
2
+ # Removes matches which overlap with another SpanQuery.
3
+ class SpanNotQuery < SpanQuery
4
+ # Construct a SpanNotQuery matching spans from +incl+ which
5
+ # have no overlap with spans from +excl+.
6
+ def initialize(incl, excl)
7
+ super()
8
+ @incl = incl
9
+ @excl = excl
10
+
11
+ if incl.field != excl.field
12
+ raise ArgumentError, "Clauses must have same field."
13
+ end
14
+ end
15
+
16
+ # Return the SpanQuery whose matches are filtered.
17
+ def incl() @incl end
18
+
19
+ # Return the SpanQuery whose matches must not overlap those returned.
20
+ def excl() @excl end
21
+
22
+ def field() @incl.field() end
23
+
24
+ def terms() @incl.terms() end
25
+
26
+ def to_s(field = nil)
27
+ return "span_not(#{incl.to_s(field)}, #{excl.to_s(field)})"
28
+ end
29
+
30
+ def spans(reader)
31
+ return SpanNotEnum.new(self, reader)
32
+ end
33
+
34
+ class SpanNotEnum < SpansEnum
35
+ def initialize(query, reader)
36
+ @query = query
37
+ @incl_spans = @query.incl.spans(reader)
38
+ @more_incl = true
39
+ @excl_spans = @query.excl.spans(reader)
40
+ @more_excl = @excl_spans.next? # excl_spans needs to be preset
41
+ end
42
+
43
+ def next?()
44
+ if (@more_incl) # move to next incl
45
+ @more_incl = @incl_spans.next?()
46
+ end
47
+
48
+ while (@more_incl and @more_excl)
49
+ if (@incl_spans.doc > @excl_spans.doc) # skip excl
50
+ @more_excl = @excl_spans.skip_to(@incl_spans.doc)
51
+ end
52
+
53
+ while (@more_excl and # while excl is before
54
+ @incl_spans.doc == @excl_spans.doc and
55
+ @excl_spans.finish <= @incl_spans.start)
56
+ @more_excl = @excl_spans.next? # increment excl
57
+ end
58
+
59
+ if (not @more_excl or # if no intersection
60
+ @incl_spans.doc != @excl_spans.doc or
61
+ @incl_spans.finish <= @excl_spans.start)
62
+ break # we found a match
63
+ end
64
+
65
+ @more_incl = @incl_spans.next? # intersected: keep scanning
66
+ end
67
+ return @more_incl
68
+ end
69
+
70
+ def skip_to(target)
71
+ if @more_incl # skip incl
72
+ @more_incl = @incl_spans.skip_to(target)
73
+ end
74
+
75
+ if not @more_incl
76
+ return false
77
+ end
78
+
79
+ if (@more_excl and @incl_spans.doc > @excl_spans.doc) # skip excl
80
+ @more_excl = @excl_spans.skip_to(@incl_spans.doc)
81
+ end
82
+
83
+ while (@more_excl and # while excl is before
84
+ @incl_spans.doc == @excl_spans.doc and
85
+ @excl_spans.finish <= @incl_spans.start)
86
+ @more_excl = @excl_spans.next? # increment excl
87
+ end
88
+
89
+ if (not @more_excl or # if no intersection
90
+ @incl_spans.doc != @excl_spans.doc or
91
+ @incl_spans.finish <= @excl_spans.start)
92
+ return true # we found a match
93
+ end
94
+
95
+ return next?() # scan to next match
96
+ end
97
+
98
+ def doc() @incl_spans.doc end
99
+ def start() @incl_spans.start end
100
+ def finish() @incl_spans.finish end
101
+
102
+ def to_s()
103
+ return "spans(#{@query})"
104
+ end
105
+ end
106
+
107
+ def rewrite(reader)
108
+ clone = nil
109
+
110
+ rewritten_incl = @incl.rewrite(reader)
111
+ if (rewritten_incl != @incl)
112
+ clone = self.clone()
113
+ clone.incl = rewritten_incl
114
+ end
115
+
116
+ rewritten_excl = @excl.rewrite(reader)
117
+ if (rewritten_excl != @excl)
118
+ clone = self.clone() if (clone == nil)
119
+ clone.excl = rewritten_excl
120
+ end
121
+
122
+ if (clone != nil)
123
+ return clone # some clauses rewrote
124
+ else
125
+ return self # no clauses rewrote
126
+ end
127
+ end
128
+
129
+ end
130
+ end
@@ -0,0 +1,176 @@
1
+ module Ferret::Search::Spans
2
+ # Matches the union of its clauses.
3
+ class SpanOrQuery < SpanQuery
4
+
5
+ # Construct a SpanOrQuery merging the provided clauses.
6
+ def initialize(clauses)
7
+ super()
8
+
9
+ # copy clauses array into an ArrayList
10
+ @clauses = Array.new(clauses.length)
11
+ @field = nil
12
+ clauses.each_index do |i|
13
+ clause = clauses[i]
14
+ if i == 0 # check field
15
+ @field = clause.field()
16
+ elsif clause.field() != @field
17
+ raise ArgumentError, "Clauses must have same field."
18
+ end
19
+ @clauses[i] = clause
20
+ end
21
+ end
22
+
23
+ # Return the clauses whose spans are matched.
24
+ def clauses() @clauses end
25
+
26
+ attr_reader :field
27
+
28
+ def terms()
29
+ terms = []
30
+ @clauses.each do |clause|
31
+ terms += clause.terms
32
+ end
33
+ return terms
34
+ end
35
+
36
+ def rewrite(reader)
37
+ clone = nil
38
+ @clauses.each_index do |i|
39
+ clause = @clauses[i]
40
+ query = clause.rewrite(reader)
41
+ if (query != clause) # clause rewrote: must clone
42
+ if (clone == nil)
43
+ clone = self.clone()
44
+ end
45
+ clone.clauses[i] = query
46
+ end
47
+ end
48
+ if (clone != nil)
49
+ return clone # some clauses rewrote
50
+ else
51
+ return self # no clauses rewrote
52
+ end
53
+ end
54
+
55
+ def to_s(field = nil)
56
+ buffer = "spanOr(["
57
+ buffer << @clauses.map {|c| c.to_s(field()) }.join(", ")
58
+ buffer << "])"
59
+ return buffer
60
+ end
61
+
62
+ def eql?(o)
63
+ return false if (o == nil or self.class() != o.class())
64
+
65
+ return false if (@clauses != o.clauses)
66
+ return false if (@field != o.field)
67
+
68
+ return true
69
+ end
70
+ alias :== :eql?
71
+
72
+ def hash()
73
+ return @clauses.hash ^ @field.hash
74
+ end
75
+
76
+ class SpanQueue < Ferret::Utils::PriorityQueue
77
+ def less_than(o1, o2)
78
+ if (o1.doc == o2.doc)
79
+ if (o1.start == o2.start)
80
+ return o1.finish < o2.finish
81
+ else
82
+ return o1.start < o2.start
83
+ end
84
+ else
85
+ return o1.doc < o2.doc
86
+ end
87
+ end
88
+ end
89
+
90
+ def spans(reader)
91
+ if (@clauses.size == 1) # optimize 1-clause case
92
+ return @clauses[0].spans(reader)
93
+ end
94
+
95
+ return SpanOrEnum.new(self, reader)
96
+ end
97
+
98
+ class SpanOrEnum < SpansEnum
99
+ def initialize(query, reader)
100
+ @query = query
101
+ @queue = SpanQueue.new(query.clauses.size)
102
+ @all = query.clauses.map {|c| c.spans(reader)}
103
+ @first_time = true
104
+ end
105
+
106
+ def next?
107
+ if (@first_time) # first time -- initialize
108
+ @all.delete_if do |spans|
109
+ if (spans.next?) # move to first entry
110
+ @queue.push(spans) # build queue
111
+ next false
112
+ else
113
+ next true
114
+ end
115
+ end
116
+ @first_time = false
117
+ return @queue.size() != 0
118
+ end
119
+
120
+ if @queue.size == 0 # all done
121
+ return false
122
+ end
123
+
124
+ if top().next? # move to next
125
+ @queue.adjust_top()
126
+ return true
127
+ end
128
+
129
+ @all.delete(@queue.pop()) # exhausted a clause
130
+
131
+ return @queue.size() != 0
132
+ end
133
+
134
+ def top() return @queue.top() end
135
+
136
+ def skip_to(target)
137
+ if (@first_time)
138
+ @all.delete_if do |spans|
139
+ if (spans.skip_to(target)) # skip each spans in all
140
+ @queue.push(spans) # build queue
141
+ next false
142
+ else
143
+ next true
144
+ end
145
+ end
146
+ @first_time = false
147
+ else
148
+ while (@queue.size != 0 and top().doc < target)
149
+ if (top().skip_to(target))
150
+ @queue.adjust_top()
151
+ else
152
+ @all.delete(@queue.pop())
153
+ end
154
+ end
155
+ end
156
+
157
+ return @queue.size() != 0
158
+ end
159
+
160
+ def doc() top().doc() end
161
+ def start() top().start() end
162
+ def finish() top().finish() end
163
+
164
+ def to_s()
165
+ buffer = "spans(#{@query})@"
166
+ if @first_time
167
+ buffer << "START"
168
+ else
169
+ buffer << (@queue.size>0 ? ("#{doc}:#{start()}-#{finish}") : "END")
170
+ end
171
+ return buffer
172
+ end
173
+ end
174
+
175
+ end
176
+ end