ferret 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +109 -0
- data/Rakefile +275 -0
- data/TODO +9 -0
- data/TUTORIAL +197 -0
- data/ext/extconf.rb +3 -0
- data/ext/ferret.c +23 -0
- data/ext/ferret.h +85 -0
- data/ext/index_io.c +543 -0
- data/ext/priority_queue.c +227 -0
- data/ext/ram_directory.c +316 -0
- data/ext/segment_merge_queue.c +41 -0
- data/ext/string_helper.c +42 -0
- data/ext/tags +240 -0
- data/ext/term.c +261 -0
- data/ext/term_buffer.c +299 -0
- data/ext/util.c +12 -0
- data/lib/ferret.rb +41 -0
- data/lib/ferret/analysis.rb +11 -0
- data/lib/ferret/analysis/analyzers.rb +93 -0
- data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
- data/lib/ferret/analysis/token.rb +79 -0
- data/lib/ferret/analysis/token_filters.rb +86 -0
- data/lib/ferret/analysis/token_stream.rb +26 -0
- data/lib/ferret/analysis/tokenizers.rb +107 -0
- data/lib/ferret/analysis/word_list_loader.rb +27 -0
- data/lib/ferret/document.rb +2 -0
- data/lib/ferret/document/document.rb +152 -0
- data/lib/ferret/document/field.rb +304 -0
- data/lib/ferret/index.rb +26 -0
- data/lib/ferret/index/compound_file_io.rb +343 -0
- data/lib/ferret/index/document_writer.rb +288 -0
- data/lib/ferret/index/field_infos.rb +259 -0
- data/lib/ferret/index/fields_io.rb +175 -0
- data/lib/ferret/index/index.rb +228 -0
- data/lib/ferret/index/index_file_names.rb +33 -0
- data/lib/ferret/index/index_reader.rb +462 -0
- data/lib/ferret/index/index_writer.rb +488 -0
- data/lib/ferret/index/multi_reader.rb +363 -0
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
- data/lib/ferret/index/segment_infos.rb +130 -0
- data/lib/ferret/index/segment_merge_info.rb +47 -0
- data/lib/ferret/index/segment_merge_queue.rb +16 -0
- data/lib/ferret/index/segment_merger.rb +337 -0
- data/lib/ferret/index/segment_reader.rb +380 -0
- data/lib/ferret/index/segment_term_enum.rb +178 -0
- data/lib/ferret/index/segment_term_vector.rb +58 -0
- data/lib/ferret/index/term.rb +49 -0
- data/lib/ferret/index/term_buffer.rb +88 -0
- data/lib/ferret/index/term_doc_enum.rb +283 -0
- data/lib/ferret/index/term_enum.rb +52 -0
- data/lib/ferret/index/term_info.rb +41 -0
- data/lib/ferret/index/term_infos_io.rb +312 -0
- data/lib/ferret/index/term_vector_offset_info.rb +20 -0
- data/lib/ferret/index/term_vectors_io.rb +552 -0
- data/lib/ferret/query_parser.rb +274 -0
- data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
- data/lib/ferret/search.rb +49 -0
- data/lib/ferret/search/boolean_clause.rb +100 -0
- data/lib/ferret/search/boolean_query.rb +303 -0
- data/lib/ferret/search/boolean_scorer.rb +294 -0
- data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
- data/lib/ferret/search/conjunction_scorer.rb +99 -0
- data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
- data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
- data/lib/ferret/search/explanation.rb +41 -0
- data/lib/ferret/search/field_cache.rb +216 -0
- data/lib/ferret/search/field_doc.rb +31 -0
- data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
- data/lib/ferret/search/filter.rb +11 -0
- data/lib/ferret/search/filtered_query.rb +130 -0
- data/lib/ferret/search/filtered_term_enum.rb +79 -0
- data/lib/ferret/search/fuzzy_query.rb +153 -0
- data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
- data/lib/ferret/search/hit_collector.rb +34 -0
- data/lib/ferret/search/hit_queue.rb +11 -0
- data/lib/ferret/search/index_searcher.rb +173 -0
- data/lib/ferret/search/match_all_docs_query.rb +104 -0
- data/lib/ferret/search/multi_phrase_query.rb +204 -0
- data/lib/ferret/search/multi_term_query.rb +65 -0
- data/lib/ferret/search/non_matching_scorer.rb +22 -0
- data/lib/ferret/search/phrase_positions.rb +55 -0
- data/lib/ferret/search/phrase_query.rb +217 -0
- data/lib/ferret/search/phrase_scorer.rb +153 -0
- data/lib/ferret/search/prefix_query.rb +47 -0
- data/lib/ferret/search/query.rb +111 -0
- data/lib/ferret/search/query_filter.rb +51 -0
- data/lib/ferret/search/range_filter.rb +103 -0
- data/lib/ferret/search/range_query.rb +139 -0
- data/lib/ferret/search/req_excl_scorer.rb +125 -0
- data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
- data/lib/ferret/search/score_doc.rb +38 -0
- data/lib/ferret/search/score_doc_comparator.rb +114 -0
- data/lib/ferret/search/scorer.rb +91 -0
- data/lib/ferret/search/similarity.rb +278 -0
- data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
- data/lib/ferret/search/sort.rb +105 -0
- data/lib/ferret/search/sort_comparator.rb +60 -0
- data/lib/ferret/search/sort_field.rb +87 -0
- data/lib/ferret/search/spans.rb +12 -0
- data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
- data/lib/ferret/search/spans/span_first_query.rb +79 -0
- data/lib/ferret/search/spans/span_near_query.rb +108 -0
- data/lib/ferret/search/spans/span_not_query.rb +130 -0
- data/lib/ferret/search/spans/span_or_query.rb +176 -0
- data/lib/ferret/search/spans/span_query.rb +25 -0
- data/lib/ferret/search/spans/span_scorer.rb +74 -0
- data/lib/ferret/search/spans/span_term_query.rb +105 -0
- data/lib/ferret/search/spans/span_weight.rb +84 -0
- data/lib/ferret/search/spans/spans_enum.rb +44 -0
- data/lib/ferret/search/term_query.rb +128 -0
- data/lib/ferret/search/term_scorer.rb +181 -0
- data/lib/ferret/search/top_docs.rb +24 -0
- data/lib/ferret/search/top_field_docs.rb +17 -0
- data/lib/ferret/search/weight.rb +54 -0
- data/lib/ferret/search/wildcard_query.rb +26 -0
- data/lib/ferret/search/wildcard_term_enum.rb +61 -0
- data/lib/ferret/stemmers.rb +1 -0
- data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
- data/lib/ferret/store.rb +5 -0
- data/lib/ferret/store/buffered_index_io.rb +191 -0
- data/lib/ferret/store/directory.rb +139 -0
- data/lib/ferret/store/fs_store.rb +338 -0
- data/lib/ferret/store/index_io.rb +259 -0
- data/lib/ferret/store/ram_store.rb +282 -0
- data/lib/ferret/utils.rb +7 -0
- data/lib/ferret/utils/bit_vector.rb +105 -0
- data/lib/ferret/utils/date_tools.rb +138 -0
- data/lib/ferret/utils/number_tools.rb +91 -0
- data/lib/ferret/utils/parameter.rb +41 -0
- data/lib/ferret/utils/priority_queue.rb +120 -0
- data/lib/ferret/utils/string_helper.rb +47 -0
- data/lib/ferret/utils/weak_key_hash.rb +51 -0
- data/rake_utils/code_statistics.rb +106 -0
- data/setup.rb +1551 -0
- data/test/benchmark/tb_ram_store.rb +76 -0
- data/test/benchmark/tb_rw_vint.rb +26 -0
- data/test/longrunning/tc_numbertools.rb +60 -0
- data/test/longrunning/tm_store.rb +19 -0
- data/test/test_all.rb +9 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/analysis/tc_analyzer.rb +21 -0
- data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
- data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
- data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
- data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
- data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
- data/test/unit/analysis/tc_stop_filter.rb +14 -0
- data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
- data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_word_list_loader.rb +32 -0
- data/test/unit/document/tc_document.rb +47 -0
- data/test/unit/document/tc_field.rb +80 -0
- data/test/unit/index/tc_compound_file_io.rb +107 -0
- data/test/unit/index/tc_field_infos.rb +119 -0
- data/test/unit/index/tc_fields_io.rb +167 -0
- data/test/unit/index/tc_index.rb +140 -0
- data/test/unit/index/tc_index_reader.rb +622 -0
- data/test/unit/index/tc_index_writer.rb +57 -0
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
- data/test/unit/index/tc_segment_infos.rb +74 -0
- data/test/unit/index/tc_segment_term_docs.rb +17 -0
- data/test/unit/index/tc_segment_term_enum.rb +60 -0
- data/test/unit/index/tc_segment_term_vector.rb +71 -0
- data/test/unit/index/tc_term.rb +22 -0
- data/test/unit/index/tc_term_buffer.rb +57 -0
- data/test/unit/index/tc_term_info.rb +19 -0
- data/test/unit/index/tc_term_infos_io.rb +192 -0
- data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
- data/test/unit/index/tc_term_vectors_io.rb +108 -0
- data/test/unit/index/th_doc.rb +244 -0
- data/test/unit/query_parser/tc_query_parser.rb +84 -0
- data/test/unit/search/tc_filter.rb +113 -0
- data/test/unit/search/tc_fuzzy_query.rb +136 -0
- data/test/unit/search/tc_index_searcher.rb +188 -0
- data/test/unit/search/tc_search_and_sort.rb +98 -0
- data/test/unit/search/tc_similarity.rb +37 -0
- data/test/unit/search/tc_sort.rb +48 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +153 -0
- data/test/unit/store/tc_fs_store.rb +84 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +180 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/ts_analysis.rb +16 -0
- data/test/unit/ts_document.rb +4 -0
- data/test/unit/ts_index.rb +18 -0
- data/test/unit/ts_query_parser.rb +3 -0
- data/test/unit/ts_search.rb +10 -0
- data/test/unit/ts_store.rb +6 -0
- data/test/unit/ts_utils.rb +10 -0
- data/test/unit/utils/tc_bit_vector.rb +65 -0
- data/test/unit/utils/tc_date_tools.rb +50 -0
- data/test/unit/utils/tc_number_tools.rb +59 -0
- data/test/unit/utils/tc_parameter.rb +40 -0
- data/test/unit/utils/tc_priority_queue.rb +62 -0
- data/test/unit/utils/tc_string_helper.rb +21 -0
- data/test/unit/utils/tc_weak_key_hash.rb +25 -0
- metadata +251 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module Ferret::Search
|
2
|
+
# A Query that matches documents containing terms with a specified prefix. A
|
3
|
+
# PrefixQuery is built by QueryParser for input like +app*+.
|
4
|
+
class PrefixQuery < Query
|
5
|
+
attr_reader :prefix
|
6
|
+
# Constructs a query for terms starting with +prefix+.
|
7
|
+
def initialize(prefix)
|
8
|
+
super()
|
9
|
+
@prefix = prefix
|
10
|
+
end
|
11
|
+
|
12
|
+
def rewrite(reader)
|
13
|
+
bq = BooleanQuery.new(true)
|
14
|
+
enumerator = reader.terms_from(@prefix)
|
15
|
+
begin
|
16
|
+
prefix_text = @prefix.text
|
17
|
+
prefix_length = prefix_text.length
|
18
|
+
prefix_field = @prefix.field
|
19
|
+
begin
|
20
|
+
term = enumerator.term
|
21
|
+
if (term.nil? or
|
22
|
+
term.field != prefix_field or
|
23
|
+
term.text[0,prefix_length] != prefix_text)
|
24
|
+
break
|
25
|
+
end
|
26
|
+
tq = TermQuery.new(term) # found a match
|
27
|
+
tq.boost = boost() # set the boost
|
28
|
+
bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
|
29
|
+
#puts("added " + term)
|
30
|
+
end while (enumerator.next?)
|
31
|
+
ensure
|
32
|
+
enumerator.close()
|
33
|
+
end
|
34
|
+
return bq
|
35
|
+
end
|
36
|
+
|
37
|
+
# Prints a user-readable version of this query.
|
38
|
+
def to_s(f)
|
39
|
+
buffer = ""
|
40
|
+
buffer << "#{@prefix.field}:" if @prefix.field != f
|
41
|
+
buffer << "#{@prefix.text}*"
|
42
|
+
buffer << "^#{boost()}" if boost() != 1.0
|
43
|
+
return buffer
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Ferret::Search
|
2
|
+
# The abstract base class for queries.
|
3
|
+
# Instantiable subclasses are:
|
4
|
+
# * TermQuery
|
5
|
+
# * MultiTermQuery
|
6
|
+
# * BooleanQuery
|
7
|
+
# * WildcardQuery
|
8
|
+
# * PhraseQuery
|
9
|
+
# * PrefixQuery
|
10
|
+
# * MultiPhraseQuery
|
11
|
+
# * FuzzyQuery
|
12
|
+
# * RangeQuery
|
13
|
+
# * Span::SpanQuery
|
14
|
+
#
|
15
|
+
# A parser for queries is contained in:
|
16
|
+
# * Ferret::QueryParser::QueryParser
|
17
|
+
#
|
18
|
+
class Query
|
19
|
+
# documents matching this query clause will (in addition to the normal
|
20
|
+
# weightings) have their score multiplied by the boost factor. It is
|
21
|
+
# 1.0 be default.
|
22
|
+
attr_accessor :boost
|
23
|
+
|
24
|
+
def initialize()
|
25
|
+
@boost = 1.0
|
26
|
+
end
|
27
|
+
|
28
|
+
# Prints a query to a string, with +field+ as the default field for
|
29
|
+
# terms. The representation used is one that is supposed to be readable
|
30
|
+
# by Ferret::QueryParser::QueryParser. However, there are the following
|
31
|
+
# limitations:
|
32
|
+
# * If the query was created by the parser, the printed representation
|
33
|
+
# may not be exactly what was parsed. For example, characters that need
|
34
|
+
# to be escaped will be represented without the required backslash.
|
35
|
+
# * Some of the more complicated queries (e.g. span queries)
|
36
|
+
# don't have a representation that can be parsed by QueryParser.
|
37
|
+
def to_s(field=nil)
|
38
|
+
raise NotImplementedError
|
39
|
+
end
|
40
|
+
|
41
|
+
# Expert: Constructs an appropriate Weight implementation for this query.
|
42
|
+
#
|
43
|
+
# Only implemented by primitive queries, which re-write to themselves.
|
44
|
+
def create_weight(searcher)
|
45
|
+
raise NotImplementedError
|
46
|
+
end
|
47
|
+
|
48
|
+
# Expert: Constructs and initializes a Weight for a top-level query.
|
49
|
+
def weight(searcher)
|
50
|
+
query = searcher.rewrite(self)
|
51
|
+
weight = query.create_weight(searcher)
|
52
|
+
sum = weight.sum_of_squared_weights()
|
53
|
+
norm = similarity(searcher).query_norm(sum)
|
54
|
+
weight.normalize(norm)
|
55
|
+
return weight
|
56
|
+
end
|
57
|
+
|
58
|
+
# Expert: called to re-write queries into primitive queries.
|
59
|
+
def rewrite(reader)
|
60
|
+
return self
|
61
|
+
end
|
62
|
+
|
63
|
+
# Expert: called when re-writing queries under MultiSearcher.
|
64
|
+
#
|
65
|
+
# Only implemented by derived queries, with no #create_weight()
|
66
|
+
# implementatation.
|
67
|
+
def combine(queries)
|
68
|
+
queries.each do |query|
|
69
|
+
if self != query
|
70
|
+
raise ArgumentError
|
71
|
+
end
|
72
|
+
end
|
73
|
+
return self
|
74
|
+
end
|
75
|
+
|
76
|
+
# Expert: adds all terms occuring in this query to the terms set
|
77
|
+
def extract_terms(terms)
|
78
|
+
raise NotImplementedError
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Expert: merges the clauses of a set of BooleanQuery's into a single
|
83
|
+
# BooleanQuery.
|
84
|
+
#
|
85
|
+
# A utility for use by #combine() implementations.
|
86
|
+
def merge_boolean_queries(queries)
|
87
|
+
all_clauses = Set.new
|
88
|
+
queries.each do |query|
|
89
|
+
query.clauses.each do |clause|
|
90
|
+
all_clauses << clause
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
coord_disabled = queries.size==0 ? false : queries[0].coord_disabled?
|
95
|
+
result = BooleanQuery.new(coord_disabled)
|
96
|
+
all_clauses.each do |clause|
|
97
|
+
result << clause
|
98
|
+
end
|
99
|
+
return result
|
100
|
+
end
|
101
|
+
|
102
|
+
# Expert: Returns the Similarity implementation to be used for this
|
103
|
+
# query. Subclasses may override this method to specify their own
|
104
|
+
# Similarity implementation, perhaps one that delegates through that of
|
105
|
+
# the Searcher. By default the Searcher's Similarity implementation is
|
106
|
+
# returned.
|
107
|
+
def similarity(searcher)
|
108
|
+
return searcher.similarity
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Ferret::Search
|
2
|
+
require 'monitor'
|
3
|
+
# Constrains search results to only match those which also match a provided
|
4
|
+
# query. Results are cached, so that searches after the first on the same
|
5
|
+
# index using this filter are much faster.
|
6
|
+
#
|
7
|
+
# This could be used, for example, with a RangeQuery on a suitably formatted
|
8
|
+
# date field to implement date filtering. One could re-use a single
|
9
|
+
# QueryFilter that matches, e.g., only documents modified within the last
|
10
|
+
# week. The QueryFilter and RangeQuery would only need to be reconstructed
|
11
|
+
# once per day.
|
12
|
+
class QueryFilter < Filter
|
13
|
+
|
14
|
+
# Constructs a filter which only matches documents matching
|
15
|
+
# +query+.
|
16
|
+
def initialize(query)
|
17
|
+
@query = query
|
18
|
+
@cache = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def bits(reader)
|
22
|
+
|
23
|
+
if (@cache == nil)
|
24
|
+
@cache = Ferret::Utils::WeakKeyHash.new.extend(MonitorMixin)
|
25
|
+
end
|
26
|
+
|
27
|
+
@cache.synchronize() do # check cache
|
28
|
+
bits = @cache[reader]
|
29
|
+
if bits
|
30
|
+
return bits
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
bits = Ferret::Utils::BitVector.new()
|
35
|
+
|
36
|
+
IndexSearcher.new(reader).search_each(@query) do |doc, score|
|
37
|
+
bits.set(doc) # set bit for hit
|
38
|
+
end
|
39
|
+
|
40
|
+
@cache.synchronize() do # update cache
|
41
|
+
@cache[reader] = bits
|
42
|
+
end
|
43
|
+
|
44
|
+
return bits
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_s()
|
48
|
+
return "QueryFilter(#{@query})"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module Ferret::Search
|
2
|
+
# A Filter that restricts search results to a range of values in a given
|
3
|
+
# field.
|
4
|
+
#
|
5
|
+
# This code borrows heavily from RangeQuery, but is implemented as a Filter.
|
6
|
+
class RangeFilter < Filter
|
7
|
+
include Ferret::Index
|
8
|
+
|
9
|
+
# field_name:: The field this range applies to
|
10
|
+
# lower_term:: The lower bound on this range
|
11
|
+
# upper_term:: The upper bound on this range
|
12
|
+
# include_lower:: Does this range include the lower bound?
|
13
|
+
# include_upper:: Does this range include the upper bound?
|
14
|
+
def initialize(field_name, lower_term, upper_term, include_lower, include_upper)
|
15
|
+
@field_name = field_name
|
16
|
+
@lower_term = lower_term
|
17
|
+
@upper_term = upper_term
|
18
|
+
@include_lower = include_lower
|
19
|
+
@include_upper = include_upper
|
20
|
+
|
21
|
+
if (lower_term.nil? and upper_term.nil?)
|
22
|
+
raise ArgumentError, "At least one value must be non-nil"
|
23
|
+
end
|
24
|
+
if (include_lower and lower_term.nil?)
|
25
|
+
raise ArgumentError, "The lower bound must be non-nil to be inclusive"
|
26
|
+
end
|
27
|
+
if (include_upper and upper_term.nil?)
|
28
|
+
raise ArgumentError, "The upper bound must be non-nil to be inclusive"
|
29
|
+
end
|
30
|
+
if (upper_term and lower_term and upper_term < lower_term)
|
31
|
+
raise ArgumentError, "The lower bound must less than the upper bound"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Constructs a filter for field +field_name+ matching less than or equal to
|
36
|
+
# +upper_term+.
|
37
|
+
def RangeFilter.new_less(field_name, upper_term, include_upper = true)
|
38
|
+
return RangeFilter.new(field_name, nil, upper_term, false, include_upper)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Constructs a filter for field +field_name+ matching greater than or equal
|
42
|
+
# to +lower_term+.
|
43
|
+
def RangeFilter.new_more(field_name, lower_term, include_lower = true)
|
44
|
+
return RangeFilter.new(field_name, lower_term, nil, include_lower, false)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a BitVector with true for documents which should be permitted in
|
48
|
+
# search results, and false for those that should not.
|
49
|
+
def bits(reader)
|
50
|
+
bits = Ferret::Utils::BitVector.new()
|
51
|
+
term_enum = reader.terms_from(Term.new(@field_name, @lower_term||""))
|
52
|
+
|
53
|
+
begin
|
54
|
+
if (term_enum.term() == nil)
|
55
|
+
return bits
|
56
|
+
end
|
57
|
+
check_lower = !@include_lower # make adjustments to set to exclusive
|
58
|
+
|
59
|
+
term_docs = reader.term_docs
|
60
|
+
begin
|
61
|
+
begin
|
62
|
+
term = term_enum.term()
|
63
|
+
break if (term.nil? or term.field != @field_name)
|
64
|
+
|
65
|
+
if (!check_lower or @lower_term.nil? or term.text > @lower_term)
|
66
|
+
check_lower = false
|
67
|
+
if @upper_term
|
68
|
+
compare = @upper_term <=> term.text
|
69
|
+
# if beyond the upper term, or is exclusive and
|
70
|
+
# this is equal to the upper term, break out
|
71
|
+
if ((compare < 0) or (!@include_upper and compare == 0))
|
72
|
+
break
|
73
|
+
end
|
74
|
+
end
|
75
|
+
# we have a good term, find the docs
|
76
|
+
|
77
|
+
term_docs.seek(term_enum)
|
78
|
+
while term_docs.next?
|
79
|
+
bits.set(term_docs.doc)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end while term_enum.next?
|
83
|
+
ensure
|
84
|
+
term_docs.close()
|
85
|
+
end
|
86
|
+
ensure
|
87
|
+
term_enum.close()
|
88
|
+
end
|
89
|
+
|
90
|
+
return bits
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_s()
|
94
|
+
buffer = "#{@field_name}:"
|
95
|
+
buffer << "[" if @include_lower
|
96
|
+
buffer << @lower_term if @lower_term
|
97
|
+
buffer << "-"
|
98
|
+
buffer << @upper_term if @upper_term
|
99
|
+
buffer << @include_upper ? "]" : "end"
|
100
|
+
return buffer
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
module Ferret::Search
|
2
|
+
# A Query that matches documents within an exclusive range. A RangeQuery
|
3
|
+
# is built by QueryParser for input like +[010 120]+.
|
4
|
+
class RangeQuery < Query
|
5
|
+
include Ferret::Index
|
6
|
+
|
7
|
+
attr_reader :lower_term, :upper_term
|
8
|
+
|
9
|
+
# Constructs a query selecting all terms greater than
|
10
|
+
# +lower_term+ but less than +upper_term+.
|
11
|
+
# There must be at least one term and either term may be nil,
|
12
|
+
# in which case there is no bound on that side, but if there are
|
13
|
+
# two terms, both terms *must* be for the same field.
|
14
|
+
#
|
15
|
+
# field:: The field this range applies to
|
16
|
+
# lower_term:: The lower bound on this range
|
17
|
+
# upper_term:: The upper bound on this range
|
18
|
+
# include_lower:: Does this range include the lower bound?
|
19
|
+
# include_upper:: Does this range include the upper bound?
|
20
|
+
def initialize(field, lower_term, upper_term, include_lower, include_upper)
|
21
|
+
super()
|
22
|
+
@field = field
|
23
|
+
@lower_term = lower_term
|
24
|
+
@upper_term = upper_term
|
25
|
+
@include_lower = include_lower
|
26
|
+
@include_upper = include_upper
|
27
|
+
|
28
|
+
if (lower_term.nil? and upper_term.nil?)
|
29
|
+
raise ArgumentError, "At least one value must be non-nil"
|
30
|
+
end
|
31
|
+
if (include_lower and lower_term.nil?)
|
32
|
+
raise ArgumentError, "The lower bound must be non-nil to be inclusive"
|
33
|
+
end
|
34
|
+
if (include_upper and upper_term.nil?)
|
35
|
+
raise ArgumentError, "The upper bound must be non-nil to be inclusive"
|
36
|
+
end
|
37
|
+
if (upper_term and lower_term and upper_term < lower_term)
|
38
|
+
raise ArgumentError, "The lower bound must less than the upper bound"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Constructs a query for field +field+ matching less than or equal to
|
43
|
+
# +upper_term+.
|
44
|
+
def RangeQuery.new_less(field, upper_term, include_upper = true)
|
45
|
+
return RangeQuery.new(field, nil, upper_term, false, include_upper)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Constructs a query for field +field+ matching greater than or equal
|
49
|
+
# to +lower_term+.
|
50
|
+
def RangeQuery.new_more(field, lower_term, include_lower = true)
|
51
|
+
return RangeQuery.new(field, lower_term, nil, include_lower, false)
|
52
|
+
end
|
53
|
+
|
54
|
+
def rewrite(reader)
|
55
|
+
bq = BooleanQuery.new(true)
|
56
|
+
term_enum = reader.terms_from(Term.new(@field, @lower_term||""))
|
57
|
+
|
58
|
+
begin
|
59
|
+
check_lower = !@include_lower
|
60
|
+
test_field = field()
|
61
|
+
begin
|
62
|
+
term = term_enum.term
|
63
|
+
|
64
|
+
break if term.nil? or term.field != @field
|
65
|
+
if (!check_lower or @lower_term.nil? or term.text > @lower_term)
|
66
|
+
check_lower = false
|
67
|
+
if @upper_term
|
68
|
+
compare = @upper_term <=> term.text
|
69
|
+
|
70
|
+
# if beyond the upper term, or is exclusive and
|
71
|
+
# this is equal to the upper term, break out
|
72
|
+
if ((compare < 0) or (not @include_upper and compare == 0))
|
73
|
+
break
|
74
|
+
end
|
75
|
+
end
|
76
|
+
tq = TermQuery.new(term) # found a match
|
77
|
+
tq.boost = boost() # set the boost
|
78
|
+
bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
|
79
|
+
end
|
80
|
+
end while term_enum.next?
|
81
|
+
ensure
|
82
|
+
term_enum.close()
|
83
|
+
end
|
84
|
+
return bq
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns the field name for this query
|
88
|
+
attr_reader :field, :lower_term, :upper_term, :include_lower, :include_upper
|
89
|
+
|
90
|
+
# Prints a user-readable version of this query.
|
91
|
+
def to_s(f=nil)
|
92
|
+
buffer = ""
|
93
|
+
buffer << "#{@field}:" if field() != f
|
94
|
+
|
95
|
+
if @lower_term
|
96
|
+
buffer << (@include_lower ? "[" : "{")
|
97
|
+
buffer << @lower_term
|
98
|
+
else
|
99
|
+
buffer << "|"
|
100
|
+
end
|
101
|
+
|
102
|
+
buffer << " " if @upper_term and @lower_term
|
103
|
+
|
104
|
+
if @upper_term
|
105
|
+
buffer << @upper_term
|
106
|
+
buffer << (@include_upper ? "]" : "}")
|
107
|
+
else
|
108
|
+
buffer << "|"
|
109
|
+
end
|
110
|
+
|
111
|
+
if boost() != 1.0
|
112
|
+
buffer << "^#{boost()}"
|
113
|
+
end
|
114
|
+
return buffer
|
115
|
+
end
|
116
|
+
|
117
|
+
# Returns true iff +o+ is equal to this.
|
118
|
+
def eql?(o)
|
119
|
+
return ((o.instance_of?(RangeQuery)) and
|
120
|
+
(boost() == o.boost()) and
|
121
|
+
(@include_upper == o.include_upper) and
|
122
|
+
(@include_lower == o.include_lower) and
|
123
|
+
(@upper_term == o.upper_term) and
|
124
|
+
(@lower_term == o.lower_term) and
|
125
|
+
(@field == o.field))
|
126
|
+
end
|
127
|
+
alias :== :eql?
|
128
|
+
|
129
|
+
# Returns a hash code value for this object.
|
130
|
+
def hash()
|
131
|
+
return (boost().hash ^
|
132
|
+
@field.hash ^
|
133
|
+
@lower_term.hash ^
|
134
|
+
@upper_term.hash ^
|
135
|
+
@include_lower.hash ^
|
136
|
+
@include_upper.hash)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|