ferret 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,47 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents containing terms with a specified prefix. A
3
+ # PrefixQuery is built by QueryParser for input like +app*+.
4
+ class PrefixQuery < Query
5
+ attr_reader :prefix
6
+ # Constructs a query for terms starting with +prefix+.
7
+ def initialize(prefix)
8
+ super()
9
+ @prefix = prefix
10
+ end
11
+
12
+ def rewrite(reader)
13
+ bq = BooleanQuery.new(true)
14
+ enumerator = reader.terms_from(@prefix)
15
+ begin
16
+ prefix_text = @prefix.text
17
+ prefix_length = prefix_text.length
18
+ prefix_field = @prefix.field
19
+ begin
20
+ term = enumerator.term
21
+ if (term.nil? or
22
+ term.field != prefix_field or
23
+ term.text[0,prefix_length] != prefix_text)
24
+ break
25
+ end
26
+ tq = TermQuery.new(term) # found a match
27
+ tq.boost = boost() # set the boost
28
+ bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
29
+ #puts("added " + term)
30
+ end while (enumerator.next?)
31
+ ensure
32
+ enumerator.close()
33
+ end
34
+ return bq
35
+ end
36
+
37
+ # Prints a user-readable version of this query.
38
+ def to_s(f)
39
+ buffer = ""
40
+ buffer << "#{@prefix.field}:" if @prefix.field != f
41
+ buffer << "#{@prefix.text}*"
42
+ buffer << "^#{boost()}" if boost() != 1.0
43
+ return buffer
44
+ end
45
+
46
+ end
47
+ end
@@ -0,0 +1,111 @@
1
+ module Ferret::Search
2
+ # The abstract base class for queries.
3
+ # Instantiable subclasses are:
4
+ # * TermQuery
5
+ # * MultiTermQuery
6
+ # * BooleanQuery
7
+ # * WildcardQuery
8
+ # * PhraseQuery
9
+ # * PrefixQuery
10
+ # * MultiPhraseQuery
11
+ # * FuzzyQuery
12
+ # * RangeQuery
13
+ # * Span::SpanQuery
14
+ #
15
+ # A parser for queries is contained in:
16
+ # * Ferret::QueryParser::QueryParser
17
+ #
18
+ class Query
19
+ # documents matching this query clause will (in addition to the normal
20
+ # weightings) have their score multiplied by the boost factor. It is
21
+ # 1.0 be default.
22
+ attr_accessor :boost
23
+
24
+ def initialize()
25
+ @boost = 1.0
26
+ end
27
+
28
+ # Prints a query to a string, with +field+ as the default field for
29
+ # terms. The representation used is one that is supposed to be readable
30
+ # by Ferret::QueryParser::QueryParser. However, there are the following
31
+ # limitations:
32
+ # * If the query was created by the parser, the printed representation
33
+ # may not be exactly what was parsed. For example, characters that need
34
+ # to be escaped will be represented without the required backslash.
35
+ # * Some of the more complicated queries (e.g. span queries)
36
+ # don't have a representation that can be parsed by QueryParser.
37
+ def to_s(field=nil)
38
+ raise NotImplementedError
39
+ end
40
+
41
+ # Expert: Constructs an appropriate Weight implementation for this query.
42
+ #
43
+ # Only implemented by primitive queries, which re-write to themselves.
44
+ def create_weight(searcher)
45
+ raise NotImplementedError
46
+ end
47
+
48
+ # Expert: Constructs and initializes a Weight for a top-level query.
49
+ def weight(searcher)
50
+ query = searcher.rewrite(self)
51
+ weight = query.create_weight(searcher)
52
+ sum = weight.sum_of_squared_weights()
53
+ norm = similarity(searcher).query_norm(sum)
54
+ weight.normalize(norm)
55
+ return weight
56
+ end
57
+
58
+ # Expert: called to re-write queries into primitive queries.
59
+ def rewrite(reader)
60
+ return self
61
+ end
62
+
63
+ # Expert: called when re-writing queries under MultiSearcher.
64
+ #
65
+ # Only implemented by derived queries, with no #create_weight()
66
+ # implementatation.
67
+ def combine(queries)
68
+ queries.each do |query|
69
+ if self != query
70
+ raise ArgumentError
71
+ end
72
+ end
73
+ return self
74
+ end
75
+
76
+ # Expert: adds all terms occuring in this query to the terms set
77
+ def extract_terms(terms)
78
+ raise NotImplementedError
79
+ end
80
+
81
+
82
+ # Expert: merges the clauses of a set of BooleanQuery's into a single
83
+ # BooleanQuery.
84
+ #
85
+ # A utility for use by #combine() implementations.
86
+ def merge_boolean_queries(queries)
87
+ all_clauses = Set.new
88
+ queries.each do |query|
89
+ query.clauses.each do |clause|
90
+ all_clauses << clause
91
+ end
92
+ end
93
+
94
+ coord_disabled = queries.size==0 ? false : queries[0].coord_disabled?
95
+ result = BooleanQuery.new(coord_disabled)
96
+ all_clauses.each do |clause|
97
+ result << clause
98
+ end
99
+ return result
100
+ end
101
+
102
+ # Expert: Returns the Similarity implementation to be used for this
103
+ # query. Subclasses may override this method to specify their own
104
+ # Similarity implementation, perhaps one that delegates through that of
105
+ # the Searcher. By default the Searcher's Similarity implementation is
106
+ # returned.
107
+ def similarity(searcher)
108
+ return searcher.similarity
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,51 @@
1
+ module Ferret::Search
2
+ require 'monitor'
3
+ # Constrains search results to only match those which also match a provided
4
+ # query. Results are cached, so that searches after the first on the same
5
+ # index using this filter are much faster.
6
+ #
7
+ # This could be used, for example, with a RangeQuery on a suitably formatted
8
+ # date field to implement date filtering. One could re-use a single
9
+ # QueryFilter that matches, e.g., only documents modified within the last
10
+ # week. The QueryFilter and RangeQuery would only need to be reconstructed
11
+ # once per day.
12
+ class QueryFilter < Filter
13
+
14
+ # Constructs a filter which only matches documents matching
15
+ # +query+.
16
+ def initialize(query)
17
+ @query = query
18
+ @cache = nil
19
+ end
20
+
21
+ def bits(reader)
22
+
23
+ if (@cache == nil)
24
+ @cache = Ferret::Utils::WeakKeyHash.new.extend(MonitorMixin)
25
+ end
26
+
27
+ @cache.synchronize() do # check cache
28
+ bits = @cache[reader]
29
+ if bits
30
+ return bits
31
+ end
32
+ end
33
+
34
+ bits = Ferret::Utils::BitVector.new()
35
+
36
+ IndexSearcher.new(reader).search_each(@query) do |doc, score|
37
+ bits.set(doc) # set bit for hit
38
+ end
39
+
40
+ @cache.synchronize() do # update cache
41
+ @cache[reader] = bits
42
+ end
43
+
44
+ return bits
45
+ end
46
+
47
+ def to_s()
48
+ return "QueryFilter(#{@query})"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,103 @@
1
+ module Ferret::Search
2
+ # A Filter that restricts search results to a range of values in a given
3
+ # field.
4
+ #
5
+ # This code borrows heavily from RangeQuery, but is implemented as a Filter.
6
+ class RangeFilter < Filter
7
+ include Ferret::Index
8
+
9
+ # field_name:: The field this range applies to
10
+ # lower_term:: The lower bound on this range
11
+ # upper_term:: The upper bound on this range
12
+ # include_lower:: Does this range include the lower bound?
13
+ # include_upper:: Does this range include the upper bound?
14
+ def initialize(field_name, lower_term, upper_term, include_lower, include_upper)
15
+ @field_name = field_name
16
+ @lower_term = lower_term
17
+ @upper_term = upper_term
18
+ @include_lower = include_lower
19
+ @include_upper = include_upper
20
+
21
+ if (lower_term.nil? and upper_term.nil?)
22
+ raise ArgumentError, "At least one value must be non-nil"
23
+ end
24
+ if (include_lower and lower_term.nil?)
25
+ raise ArgumentError, "The lower bound must be non-nil to be inclusive"
26
+ end
27
+ if (include_upper and upper_term.nil?)
28
+ raise ArgumentError, "The upper bound must be non-nil to be inclusive"
29
+ end
30
+ if (upper_term and lower_term and upper_term < lower_term)
31
+ raise ArgumentError, "The lower bound must less than the upper bound"
32
+ end
33
+ end
34
+
35
+ # Constructs a filter for field +field_name+ matching less than or equal to
36
+ # +upper_term+.
37
+ def RangeFilter.new_less(field_name, upper_term, include_upper = true)
38
+ return RangeFilter.new(field_name, nil, upper_term, false, include_upper)
39
+ end
40
+
41
+ # Constructs a filter for field +field_name+ matching greater than or equal
42
+ # to +lower_term+.
43
+ def RangeFilter.new_more(field_name, lower_term, include_lower = true)
44
+ return RangeFilter.new(field_name, lower_term, nil, include_lower, false)
45
+ end
46
+
47
+ # Returns a BitVector with true for documents which should be permitted in
48
+ # search results, and false for those that should not.
49
+ def bits(reader)
50
+ bits = Ferret::Utils::BitVector.new()
51
+ term_enum = reader.terms_from(Term.new(@field_name, @lower_term||""))
52
+
53
+ begin
54
+ if (term_enum.term() == nil)
55
+ return bits
56
+ end
57
+ check_lower = !@include_lower # make adjustments to set to exclusive
58
+
59
+ term_docs = reader.term_docs
60
+ begin
61
+ begin
62
+ term = term_enum.term()
63
+ break if (term.nil? or term.field != @field_name)
64
+
65
+ if (!check_lower or @lower_term.nil? or term.text > @lower_term)
66
+ check_lower = false
67
+ if @upper_term
68
+ compare = @upper_term <=> term.text
69
+ # if beyond the upper term, or is exclusive and
70
+ # this is equal to the upper term, break out
71
+ if ((compare < 0) or (!@include_upper and compare == 0))
72
+ break
73
+ end
74
+ end
75
+ # we have a good term, find the docs
76
+
77
+ term_docs.seek(term_enum)
78
+ while term_docs.next?
79
+ bits.set(term_docs.doc)
80
+ end
81
+ end
82
+ end while term_enum.next?
83
+ ensure
84
+ term_docs.close()
85
+ end
86
+ ensure
87
+ term_enum.close()
88
+ end
89
+
90
+ return bits
91
+ end
92
+
93
+ def to_s()
94
+ buffer = "#{@field_name}:"
95
+ buffer << "[" if @include_lower
96
+ buffer << @lower_term if @lower_term
97
+ buffer << "-"
98
+ buffer << @upper_term if @upper_term
99
+ buffer << @include_upper ? "]" : "end"
100
+ return buffer
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,139 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents within an exclusive range. A RangeQuery
3
+ # is built by QueryParser for input like +[010 120]+.
4
+ class RangeQuery < Query
5
+ include Ferret::Index
6
+
7
+ attr_reader :lower_term, :upper_term
8
+
9
+ # Constructs a query selecting all terms greater than
10
+ # +lower_term+ but less than +upper_term+.
11
+ # There must be at least one term and either term may be nil,
12
+ # in which case there is no bound on that side, but if there are
13
+ # two terms, both terms *must* be for the same field.
14
+ #
15
+ # field:: The field this range applies to
16
+ # lower_term:: The lower bound on this range
17
+ # upper_term:: The upper bound on this range
18
+ # include_lower:: Does this range include the lower bound?
19
+ # include_upper:: Does this range include the upper bound?
20
+ def initialize(field, lower_term, upper_term, include_lower, include_upper)
21
+ super()
22
+ @field = field
23
+ @lower_term = lower_term
24
+ @upper_term = upper_term
25
+ @include_lower = include_lower
26
+ @include_upper = include_upper
27
+
28
+ if (lower_term.nil? and upper_term.nil?)
29
+ raise ArgumentError, "At least one value must be non-nil"
30
+ end
31
+ if (include_lower and lower_term.nil?)
32
+ raise ArgumentError, "The lower bound must be non-nil to be inclusive"
33
+ end
34
+ if (include_upper and upper_term.nil?)
35
+ raise ArgumentError, "The upper bound must be non-nil to be inclusive"
36
+ end
37
+ if (upper_term and lower_term and upper_term < lower_term)
38
+ raise ArgumentError, "The lower bound must less than the upper bound"
39
+ end
40
+ end
41
+
42
+ # Constructs a query for field +field+ matching less than or equal to
43
+ # +upper_term+.
44
+ def RangeQuery.new_less(field, upper_term, include_upper = true)
45
+ return RangeQuery.new(field, nil, upper_term, false, include_upper)
46
+ end
47
+
48
+ # Constructs a query for field +field+ matching greater than or equal
49
+ # to +lower_term+.
50
+ def RangeQuery.new_more(field, lower_term, include_lower = true)
51
+ return RangeQuery.new(field, lower_term, nil, include_lower, false)
52
+ end
53
+
54
+ def rewrite(reader)
55
+ bq = BooleanQuery.new(true)
56
+ term_enum = reader.terms_from(Term.new(@field, @lower_term||""))
57
+
58
+ begin
59
+ check_lower = !@include_lower
60
+ test_field = field()
61
+ begin
62
+ term = term_enum.term
63
+
64
+ break if term.nil? or term.field != @field
65
+ if (!check_lower or @lower_term.nil? or term.text > @lower_term)
66
+ check_lower = false
67
+ if @upper_term
68
+ compare = @upper_term <=> term.text
69
+
70
+ # if beyond the upper term, or is exclusive and
71
+ # this is equal to the upper term, break out
72
+ if ((compare < 0) or (not @include_upper and compare == 0))
73
+ break
74
+ end
75
+ end
76
+ tq = TermQuery.new(term) # found a match
77
+ tq.boost = boost() # set the boost
78
+ bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
79
+ end
80
+ end while term_enum.next?
81
+ ensure
82
+ term_enum.close()
83
+ end
84
+ return bq
85
+ end
86
+
87
+ # Returns the field name for this query
88
+ attr_reader :field, :lower_term, :upper_term, :include_lower, :include_upper
89
+
90
+ # Prints a user-readable version of this query.
91
+ def to_s(f=nil)
92
+ buffer = ""
93
+ buffer << "#{@field}:" if field() != f
94
+
95
+ if @lower_term
96
+ buffer << (@include_lower ? "[" : "{")
97
+ buffer << @lower_term
98
+ else
99
+ buffer << "|"
100
+ end
101
+
102
+ buffer << " " if @upper_term and @lower_term
103
+
104
+ if @upper_term
105
+ buffer << @upper_term
106
+ buffer << (@include_upper ? "]" : "}")
107
+ else
108
+ buffer << "|"
109
+ end
110
+
111
+ if boost() != 1.0
112
+ buffer << "^#{boost()}"
113
+ end
114
+ return buffer
115
+ end
116
+
117
+ # Returns true iff +o+ is equal to this.
118
+ def eql?(o)
119
+ return ((o.instance_of?(RangeQuery)) and
120
+ (boost() == o.boost()) and
121
+ (@include_upper == o.include_upper) and
122
+ (@include_lower == o.include_lower) and
123
+ (@upper_term == o.upper_term) and
124
+ (@lower_term == o.lower_term) and
125
+ (@field == o.field))
126
+ end
127
+ alias :== :eql?
128
+
129
+ # Returns a hash code value for this object.
130
+ def hash()
131
+ return (boost().hash ^
132
+ @field.hash ^
133
+ @lower_term.hash ^
134
+ @upper_term.hash ^
135
+ @include_lower.hash ^
136
+ @include_upper.hash)
137
+ end
138
+ end
139
+ end