ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,47 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents containing terms with a specified prefix. A
3
+ # PrefixQuery is built by QueryParser for input like +app*+.
4
+ class PrefixQuery < Query
5
+ attr_reader :prefix
6
+ # Constructs a query for terms starting with +prefix+.
7
+ def initialize(prefix)
8
+ super()
9
+ @prefix = prefix
10
+ end
11
+
12
+ def rewrite(reader)
13
+ bq = BooleanQuery.new(true)
14
+ enumerator = reader.terms_from(@prefix)
15
+ begin
16
+ prefix_text = @prefix.text
17
+ prefix_length = prefix_text.length
18
+ prefix_field = @prefix.field
19
+ begin
20
+ term = enumerator.term
21
+ if (term.nil? or
22
+ term.field != prefix_field or
23
+ term.text[0,prefix_length] != prefix_text)
24
+ break
25
+ end
26
+ tq = TermQuery.new(term) # found a match
27
+ tq.boost = boost() # set the boost
28
+ bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
29
+ #puts("added " + term)
30
+ end while (enumerator.next?)
31
+ ensure
32
+ enumerator.close()
33
+ end
34
+ return bq
35
+ end
36
+
37
+ # Prints a user-readable version of this query.
38
+ def to_s(f)
39
+ buffer = ""
40
+ buffer << "#{@prefix.field}:" if @prefix.field != f
41
+ buffer << "#{@prefix.text}*"
42
+ buffer << "^#{boost()}" if boost() != 1.0
43
+ return buffer
44
+ end
45
+
46
+ end
47
+ end
@@ -0,0 +1,111 @@
1
+ module Ferret::Search
2
+ # The abstract base class for queries.
3
+ # Instantiable subclasses are:
4
+ # * TermQuery
5
+ # * MultiTermQuery
6
+ # * BooleanQuery
7
+ # * WildcardQuery
8
+ # * PhraseQuery
9
+ # * PrefixQuery
10
+ # * MultiPhraseQuery
11
+ # * FuzzyQuery
12
+ # * RangeQuery
13
+ # * Span::SpanQuery
14
+ #
15
+ # A parser for queries is contained in:
16
+ # * Ferret::QueryParser::QueryParser
17
+ #
18
+ class Query
19
+ # documents matching this query clause will (in addition to the normal
20
+ # weightings) have their score multiplied by the boost factor. It is
21
+ # 1.0 be default.
22
+ attr_accessor :boost
23
+
24
+ def initialize()
25
+ @boost = 1.0
26
+ end
27
+
28
+ # Prints a query to a string, with +field+ as the default field for
29
+ # terms. The representation used is one that is supposed to be readable
30
+ # by Ferret::QueryParser::QueryParser. However, there are the following
31
+ # limitations:
32
+ # * If the query was created by the parser, the printed representation
33
+ # may not be exactly what was parsed. For example, characters that need
34
+ # to be escaped will be represented without the required backslash.
35
+ # * Some of the more complicated queries (e.g. span queries)
36
+ # don't have a representation that can be parsed by QueryParser.
37
+ def to_s(field=nil)
38
+ raise NotImplementedError
39
+ end
40
+
41
+ # Expert: Constructs an appropriate Weight implementation for this query.
42
+ #
43
+ # Only implemented by primitive queries, which re-write to themselves.
44
+ def create_weight(searcher)
45
+ raise NotImplementedError
46
+ end
47
+
48
+ # Expert: Constructs and initializes a Weight for a top-level query.
49
+ def weight(searcher)
50
+ query = searcher.rewrite(self)
51
+ weight = query.create_weight(searcher)
52
+ sum = weight.sum_of_squared_weights()
53
+ norm = similarity(searcher).query_norm(sum)
54
+ weight.normalize(norm)
55
+ return weight
56
+ end
57
+
58
+ # Expert: called to re-write queries into primitive queries.
59
+ def rewrite(reader)
60
+ return self
61
+ end
62
+
63
+ # Expert: called when re-writing queries under MultiSearcher.
64
+ #
65
+ # Only implemented by derived queries, with no #create_weight()
66
+ # implementatation.
67
+ def combine(queries)
68
+ queries.each do |query|
69
+ if self != query
70
+ raise ArgumentError
71
+ end
72
+ end
73
+ return self
74
+ end
75
+
76
+ # Expert: adds all terms occuring in this query to the terms set
77
+ def extract_terms(terms)
78
+ raise NotImplementedError
79
+ end
80
+
81
+
82
+ # Expert: merges the clauses of a set of BooleanQuery's into a single
83
+ # BooleanQuery.
84
+ #
85
+ # A utility for use by #combine() implementations.
86
+ def merge_boolean_queries(queries)
87
+ all_clauses = Set.new
88
+ queries.each do |query|
89
+ query.clauses.each do |clause|
90
+ all_clauses << clause
91
+ end
92
+ end
93
+
94
+ coord_disabled = queries.size==0 ? false : queries[0].coord_disabled?
95
+ result = BooleanQuery.new(coord_disabled)
96
+ all_clauses.each do |clause|
97
+ result << clause
98
+ end
99
+ return result
100
+ end
101
+
102
+ # Expert: Returns the Similarity implementation to be used for this
103
+ # query. Subclasses may override this method to specify their own
104
+ # Similarity implementation, perhaps one that delegates through that of
105
+ # the Searcher. By default the Searcher's Similarity implementation is
106
+ # returned.
107
+ def similarity(searcher)
108
+ return searcher.similarity
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,51 @@
1
+ module Ferret::Search
2
+ require 'monitor'
3
+ # Constrains search results to only match those which also match a provided
4
+ # query. Results are cached, so that searches after the first on the same
5
+ # index using this filter are much faster.
6
+ #
7
+ # This could be used, for example, with a RangeQuery on a suitably formatted
8
+ # date field to implement date filtering. One could re-use a single
9
+ # QueryFilter that matches, e.g., only documents modified within the last
10
+ # week. The QueryFilter and RangeQuery would only need to be reconstructed
11
+ # once per day.
12
+ class QueryFilter < Filter
13
+
14
+ # Constructs a filter which only matches documents matching
15
+ # +query+.
16
+ def initialize(query)
17
+ @query = query
18
+ @cache = nil
19
+ end
20
+
21
+ def bits(reader)
22
+
23
+ if (@cache == nil)
24
+ @cache = Ferret::Utils::WeakKeyHash.new.extend(MonitorMixin)
25
+ end
26
+
27
+ @cache.synchronize() do # check cache
28
+ bits = @cache[reader]
29
+ if bits
30
+ return bits
31
+ end
32
+ end
33
+
34
+ bits = Ferret::Utils::BitVector.new()
35
+
36
+ IndexSearcher.new(reader).search_each(@query) do |doc, score|
37
+ bits.set(doc) # set bit for hit
38
+ end
39
+
40
+ @cache.synchronize() do # update cache
41
+ @cache[reader] = bits
42
+ end
43
+
44
+ return bits
45
+ end
46
+
47
+ def to_s()
48
+ return "QueryFilter(#{@query})"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,103 @@
1
+ module Ferret::Search
2
+ # A Filter that restricts search results to a range of values in a given
3
+ # field.
4
+ #
5
+ # This code borrows heavily from RangeQuery, but is implemented as a Filter.
6
+ class RangeFilter < Filter
7
+ include Ferret::Index
8
+
9
+ # field_name:: The field this range applies to
10
+ # lower_term:: The lower bound on this range
11
+ # upper_term:: The upper bound on this range
12
+ # include_lower:: Does this range include the lower bound?
13
+ # include_upper:: Does this range include the upper bound?
14
+ def initialize(field_name, lower_term, upper_term, include_lower, include_upper)
15
+ @field_name = field_name
16
+ @lower_term = lower_term
17
+ @upper_term = upper_term
18
+ @include_lower = include_lower
19
+ @include_upper = include_upper
20
+
21
+ if (lower_term.nil? and upper_term.nil?)
22
+ raise ArgumentError, "At least one value must be non-nil"
23
+ end
24
+ if (include_lower and lower_term.nil?)
25
+ raise ArgumentError, "The lower bound must be non-nil to be inclusive"
26
+ end
27
+ if (include_upper and upper_term.nil?)
28
+ raise ArgumentError, "The upper bound must be non-nil to be inclusive"
29
+ end
30
+ if (upper_term and lower_term and upper_term < lower_term)
31
+ raise ArgumentError, "The lower bound must less than the upper bound"
32
+ end
33
+ end
34
+
35
+ # Constructs a filter for field +field_name+ matching less than or equal to
36
+ # +upper_term+.
37
+ def RangeFilter.new_less(field_name, upper_term, include_upper = true)
38
+ return RangeFilter.new(field_name, nil, upper_term, false, include_upper)
39
+ end
40
+
41
+ # Constructs a filter for field +field_name+ matching greater than or equal
42
+ # to +lower_term+.
43
+ def RangeFilter.new_more(field_name, lower_term, include_lower = true)
44
+ return RangeFilter.new(field_name, lower_term, nil, include_lower, false)
45
+ end
46
+
47
+ # Returns a BitVector with true for documents which should be permitted in
48
+ # search results, and false for those that should not.
49
+ def bits(reader)
50
+ bits = Ferret::Utils::BitVector.new()
51
+ term_enum = reader.terms_from(Term.new(@field_name, @lower_term||""))
52
+
53
+ begin
54
+ if (term_enum.term() == nil)
55
+ return bits
56
+ end
57
+ check_lower = !@include_lower # make adjustments to set to exclusive
58
+
59
+ term_docs = reader.term_docs
60
+ begin
61
+ begin
62
+ term = term_enum.term()
63
+ break if (term.nil? or term.field != @field_name)
64
+
65
+ if (!check_lower or @lower_term.nil? or term.text > @lower_term)
66
+ check_lower = false
67
+ if @upper_term
68
+ compare = @upper_term <=> term.text
69
+ # if beyond the upper term, or is exclusive and
70
+ # this is equal to the upper term, break out
71
+ if ((compare < 0) or (!@include_upper and compare == 0))
72
+ break
73
+ end
74
+ end
75
+ # we have a good term, find the docs
76
+
77
+ term_docs.seek(term_enum)
78
+ while term_docs.next?
79
+ bits.set(term_docs.doc)
80
+ end
81
+ end
82
+ end while term_enum.next?
83
+ ensure
84
+ term_docs.close()
85
+ end
86
+ ensure
87
+ term_enum.close()
88
+ end
89
+
90
+ return bits
91
+ end
92
+
93
+ def to_s()
94
+ buffer = "#{@field_name}:"
95
+ buffer << "[" if @include_lower
96
+ buffer << @lower_term if @lower_term
97
+ buffer << "-"
98
+ buffer << @upper_term if @upper_term
99
+ buffer << @include_upper ? "]" : "end"
100
+ return buffer
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,139 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents within an exclusive range. A RangeQuery
3
+ # is built by QueryParser for input like +[010 120]+.
4
+ class RangeQuery < Query
5
+ include Ferret::Index
6
+
7
+ attr_reader :lower_term, :upper_term
8
+
9
+ # Constructs a query selecting all terms greater than
10
+ # +lower_term+ but less than +upper_term+.
11
+ # There must be at least one term and either term may be nil,
12
+ # in which case there is no bound on that side, but if there are
13
+ # two terms, both terms *must* be for the same field.
14
+ #
15
+ # field:: The field this range applies to
16
+ # lower_term:: The lower bound on this range
17
+ # upper_term:: The upper bound on this range
18
+ # include_lower:: Does this range include the lower bound?
19
+ # include_upper:: Does this range include the upper bound?
20
+ def initialize(field, lower_term, upper_term, include_lower, include_upper)
21
+ super()
22
+ @field = field
23
+ @lower_term = lower_term
24
+ @upper_term = upper_term
25
+ @include_lower = include_lower
26
+ @include_upper = include_upper
27
+
28
+ if (lower_term.nil? and upper_term.nil?)
29
+ raise ArgumentError, "At least one value must be non-nil"
30
+ end
31
+ if (include_lower and lower_term.nil?)
32
+ raise ArgumentError, "The lower bound must be non-nil to be inclusive"
33
+ end
34
+ if (include_upper and upper_term.nil?)
35
+ raise ArgumentError, "The upper bound must be non-nil to be inclusive"
36
+ end
37
+ if (upper_term and lower_term and upper_term < lower_term)
38
+ raise ArgumentError, "The lower bound must less than the upper bound"
39
+ end
40
+ end
41
+
42
+ # Constructs a query for field +field+ matching less than or equal to
43
+ # +upper_term+.
44
+ def RangeQuery.new_less(field, upper_term, include_upper = true)
45
+ return RangeQuery.new(field, nil, upper_term, false, include_upper)
46
+ end
47
+
48
+ # Constructs a query for field +field+ matching greater than or equal
49
+ # to +lower_term+.
50
+ def RangeQuery.new_more(field, lower_term, include_lower = true)
51
+ return RangeQuery.new(field, lower_term, nil, include_lower, false)
52
+ end
53
+
54
+ def rewrite(reader)
55
+ bq = BooleanQuery.new(true)
56
+ term_enum = reader.terms_from(Term.new(@field, @lower_term||""))
57
+
58
+ begin
59
+ check_lower = !@include_lower
60
+ test_field = field()
61
+ begin
62
+ term = term_enum.term
63
+
64
+ break if term.nil? or term.field != @field
65
+ if (!check_lower or @lower_term.nil? or term.text > @lower_term)
66
+ check_lower = false
67
+ if @upper_term
68
+ compare = @upper_term <=> term.text
69
+
70
+ # if beyond the upper term, or is exclusive and
71
+ # this is equal to the upper term, break out
72
+ if ((compare < 0) or (not @include_upper and compare == 0))
73
+ break
74
+ end
75
+ end
76
+ tq = TermQuery.new(term) # found a match
77
+ tq.boost = boost() # set the boost
78
+ bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
79
+ end
80
+ end while term_enum.next?
81
+ ensure
82
+ term_enum.close()
83
+ end
84
+ return bq
85
+ end
86
+
87
+ # Returns the field name for this query
88
+ attr_reader :field, :lower_term, :upper_term, :include_lower, :include_upper
89
+
90
+ # Prints a user-readable version of this query.
91
+ def to_s(f=nil)
92
+ buffer = ""
93
+ buffer << "#{@field}:" if field() != f
94
+
95
+ if @lower_term
96
+ buffer << (@include_lower ? "[" : "{")
97
+ buffer << @lower_term
98
+ else
99
+ buffer << "|"
100
+ end
101
+
102
+ buffer << " " if @upper_term and @lower_term
103
+
104
+ if @upper_term
105
+ buffer << @upper_term
106
+ buffer << (@include_upper ? "]" : "}")
107
+ else
108
+ buffer << "|"
109
+ end
110
+
111
+ if boost() != 1.0
112
+ buffer << "^#{boost()}"
113
+ end
114
+ return buffer
115
+ end
116
+
117
+ # Returns true iff +o+ is equal to this.
118
+ def eql?(o)
119
+ return ((o.instance_of?(RangeQuery)) and
120
+ (boost() == o.boost()) and
121
+ (@include_upper == o.include_upper) and
122
+ (@include_lower == o.include_lower) and
123
+ (@upper_term == o.upper_term) and
124
+ (@lower_term == o.lower_term) and
125
+ (@field == o.field))
126
+ end
127
+ alias :== :eql?
128
+
129
+ # Returns a hash code value for this object.
130
+ def hash()
131
+ return (boost().hash ^
132
+ @field.hash ^
133
+ @lower_term.hash ^
134
+ @upper_term.hash ^
135
+ @include_lower.hash ^
136
+ @include_upper.hash)
137
+ end
138
+ end
139
+ end