ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,139 +0,0 @@
|
|
1
|
-
module Ferret::Search
|
2
|
-
# A Query that matches documents within an exclusive range. A RangeQuery
|
3
|
-
# is built by QueryParser for input like +[010 120]+.
|
4
|
-
class RangeQuery < Query
|
5
|
-
include Ferret::Index
|
6
|
-
|
7
|
-
attr_reader :lower_term, :upper_term
|
8
|
-
|
9
|
-
# Constructs a query selecting all terms greater than
|
10
|
-
# +lower_term+ but less than +upper_term+.
|
11
|
-
# There must be at least one term and either term may be nil,
|
12
|
-
# in which case there is no bound on that side, but if there are
|
13
|
-
# two terms, both terms *must* be for the same field.
|
14
|
-
#
|
15
|
-
# field:: The field this range applies to
|
16
|
-
# lower_term:: The lower bound on this range
|
17
|
-
# upper_term:: The upper bound on this range
|
18
|
-
# include_lower:: Does this range include the lower bound?
|
19
|
-
# include_upper:: Does this range include the upper bound?
|
20
|
-
def initialize(field, lower_term, upper_term, include_lower, include_upper)
|
21
|
-
super()
|
22
|
-
@field = field
|
23
|
-
@lower_term = lower_term
|
24
|
-
@upper_term = upper_term
|
25
|
-
@include_lower = include_lower
|
26
|
-
@include_upper = include_upper
|
27
|
-
|
28
|
-
if (lower_term.nil? and upper_term.nil?)
|
29
|
-
raise ArgumentError, "At least one value must be non-nil"
|
30
|
-
end
|
31
|
-
if (include_lower and lower_term.nil?)
|
32
|
-
raise ArgumentError, "The lower bound must be non-nil to be inclusive"
|
33
|
-
end
|
34
|
-
if (include_upper and upper_term.nil?)
|
35
|
-
raise ArgumentError, "The upper bound must be non-nil to be inclusive"
|
36
|
-
end
|
37
|
-
if (upper_term and lower_term and upper_term < lower_term)
|
38
|
-
raise ArgumentError, "The lower bound must less than the upper bound"
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# Constructs a query for field +field+ matching less than or equal to
|
43
|
-
# +upper_term+.
|
44
|
-
def RangeQuery.new_less(field, upper_term, include_upper = true)
|
45
|
-
return RangeQuery.new(field, nil, upper_term, false, include_upper)
|
46
|
-
end
|
47
|
-
|
48
|
-
# Constructs a query for field +field+ matching greater than or equal
|
49
|
-
# to +lower_term+.
|
50
|
-
def RangeQuery.new_more(field, lower_term, include_lower = true)
|
51
|
-
return RangeQuery.new(field, lower_term, nil, include_lower, false)
|
52
|
-
end
|
53
|
-
|
54
|
-
def rewrite(reader)
|
55
|
-
bq = BooleanQuery.new(true)
|
56
|
-
term_enum = reader.terms_from(Term.new(@field, @lower_term||""))
|
57
|
-
|
58
|
-
begin
|
59
|
-
check_lower = !@include_lower
|
60
|
-
test_field = field()
|
61
|
-
begin
|
62
|
-
term = term_enum.term
|
63
|
-
|
64
|
-
break if term.nil? or term.field != @field
|
65
|
-
if (!check_lower or @lower_term.nil? or term.text > @lower_term)
|
66
|
-
check_lower = false
|
67
|
-
if @upper_term
|
68
|
-
compare = @upper_term <=> term.text
|
69
|
-
|
70
|
-
# if beyond the upper term, or is exclusive and
|
71
|
-
# this is equal to the upper term, break out
|
72
|
-
if ((compare < 0) or (not @include_upper and compare == 0))
|
73
|
-
break
|
74
|
-
end
|
75
|
-
end
|
76
|
-
tq = TermQuery.new(term) # found a match
|
77
|
-
tq.boost = boost() # set the boost
|
78
|
-
bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
|
79
|
-
end
|
80
|
-
end while term_enum.next?
|
81
|
-
ensure
|
82
|
-
term_enum.close()
|
83
|
-
end
|
84
|
-
return bq
|
85
|
-
end
|
86
|
-
|
87
|
-
# Returns the field name for this query
|
88
|
-
attr_reader :field, :lower_term, :upper_term, :include_lower, :include_upper
|
89
|
-
|
90
|
-
# Prints a user-readable version of this query.
|
91
|
-
def to_s(f=nil)
|
92
|
-
buffer = ""
|
93
|
-
buffer << "#{@field}:" if field() != f
|
94
|
-
|
95
|
-
if @lower_term
|
96
|
-
buffer << (@include_lower ? "[" : "{")
|
97
|
-
buffer << @lower_term
|
98
|
-
else
|
99
|
-
buffer << "<"
|
100
|
-
end
|
101
|
-
|
102
|
-
buffer << " " if @upper_term and @lower_term
|
103
|
-
|
104
|
-
if @upper_term
|
105
|
-
buffer << @upper_term
|
106
|
-
buffer << (@include_upper ? "]" : "}")
|
107
|
-
else
|
108
|
-
buffer << ">"
|
109
|
-
end
|
110
|
-
|
111
|
-
if boost() != 1.0
|
112
|
-
buffer << "^#{boost()}"
|
113
|
-
end
|
114
|
-
return buffer
|
115
|
-
end
|
116
|
-
|
117
|
-
# Returns true iff +o+ is equal to this.
|
118
|
-
def eql?(o)
|
119
|
-
return ((o.instance_of?(RangeQuery)) and
|
120
|
-
(boost() == o.boost()) and
|
121
|
-
(@include_upper == o.include_upper) and
|
122
|
-
(@include_lower == o.include_lower) and
|
123
|
-
(@upper_term == o.upper_term) and
|
124
|
-
(@lower_term == o.lower_term) and
|
125
|
-
(@field == o.field))
|
126
|
-
end
|
127
|
-
alias :== :eql?
|
128
|
-
|
129
|
-
# Returns a hash code value for this object.
|
130
|
-
def hash()
|
131
|
-
return (boost().hash ^
|
132
|
-
@field.hash ^
|
133
|
-
@lower_term.hash ^
|
134
|
-
@upper_term.hash ^
|
135
|
-
@include_lower.hash ^
|
136
|
-
@include_upper.hash)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
@@ -1,125 +0,0 @@
|
|
1
|
-
module Ferret::Search
|
2
|
-
# A Scorer for queries with a required subscorer and an excluding (prohibited)
|
3
|
-
# subscorer.
|
4
|
-
#
|
5
|
-
# This +Scorer+ implements Scorer#skip_to(int), and it uses the skip_to() on
|
6
|
-
# the given scorers.
|
7
|
-
class ReqExclScorer < Scorer
|
8
|
-
# Construct a +ReqExclScorer+.
|
9
|
-
# req_scorer:: The scorer that must match, except where
|
10
|
-
# excl_scorer:: indicates exclusion.
|
11
|
-
def initialize(req_scorer, excl_scorer)
|
12
|
-
super(nil) # No similarity used.
|
13
|
-
@req_scorer = req_scorer
|
14
|
-
@excl_scorer = excl_scorer
|
15
|
-
|
16
|
-
@first_time = true
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
def next?
|
21
|
-
if @first_time
|
22
|
-
if not @excl_scorer.next?
|
23
|
-
@excl_scorer = nil # exhausted at start
|
24
|
-
end
|
25
|
-
@first_time = false
|
26
|
-
end
|
27
|
-
if @req_scorer == nil
|
28
|
-
return false
|
29
|
-
end
|
30
|
-
if not @req_scorer.next?
|
31
|
-
@req_scorer = nil; # exhausted, nothing left
|
32
|
-
return false
|
33
|
-
end
|
34
|
-
if @excl_scorer == nil
|
35
|
-
return true # @req_scorer.next? already returned true
|
36
|
-
end
|
37
|
-
return to_non_excluded()
|
38
|
-
end
|
39
|
-
|
40
|
-
# Advance to non excluded doc.
|
41
|
-
# On entry:
|
42
|
-
#
|
43
|
-
# * @req_scorer != nil
|
44
|
-
# * @excl_scorer != nil
|
45
|
-
# * @req_scorer was advanced once via next? or skip_to() and
|
46
|
-
# @req_scorer.doc() may still be excluded.
|
47
|
-
#
|
48
|
-
# Advances @req_scorer a non excluded required doc, if any.
|
49
|
-
#
|
50
|
-
# returns:: true iff there is a non excluded required doc.
|
51
|
-
def to_non_excluded()
|
52
|
-
excl_doc = @excl_scorer.doc
|
53
|
-
begin
|
54
|
-
req_doc = @req_scorer.doc # may be excluded
|
55
|
-
if (req_doc < excl_doc)
|
56
|
-
return true # @req_scorer advanced to before @excl_scorer, ie. not excluded
|
57
|
-
elsif (req_doc > excl_doc)
|
58
|
-
unless @excl_scorer.skip_to(req_doc)
|
59
|
-
@excl_scorer = nil # exhausted, no more exclusions
|
60
|
-
return true
|
61
|
-
end
|
62
|
-
excl_doc = @excl_scorer.doc
|
63
|
-
if excl_doc > req_doc
|
64
|
-
return true; # not excluded
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end while @req_scorer.next?
|
68
|
-
@req_scorer = nil; # exhausted, nothing left
|
69
|
-
return false
|
70
|
-
end
|
71
|
-
|
72
|
-
# @req_scorer may be nil when next? or skip_to() already return false so
|
73
|
-
# only call when you know that a doc exists
|
74
|
-
def doc()
|
75
|
-
return @req_scorer.doc
|
76
|
-
end
|
77
|
-
|
78
|
-
# Returns the score of the current document matching the query.
|
79
|
-
#
|
80
|
-
# Initially invalid, until #next? is called the first time.
|
81
|
-
#
|
82
|
-
# returns:: The score of the required scorer.
|
83
|
-
def score()
|
84
|
-
return @req_scorer.score()
|
85
|
-
end
|
86
|
-
|
87
|
-
# Skips to the first match beyond the current whose document number is
|
88
|
-
# greater than or equal to a given target.
|
89
|
-
#
|
90
|
-
# When this method is used the #explain(int) method should not be used.
|
91
|
-
#
|
92
|
-
# target:: The target document number.
|
93
|
-
# returns:: true iff there is such a match.
|
94
|
-
def skip_to(target)
|
95
|
-
if (@first_time)
|
96
|
-
@first_time = false
|
97
|
-
if (! @excl_scorer.skip_to(target))
|
98
|
-
@excl_scorer = nil; # exhausted
|
99
|
-
end
|
100
|
-
end
|
101
|
-
if (@req_scorer == nil)
|
102
|
-
return false
|
103
|
-
end
|
104
|
-
if (@excl_scorer == nil)
|
105
|
-
return @req_scorer.skip_to(target)
|
106
|
-
end
|
107
|
-
if (! @req_scorer.skip_to(target))
|
108
|
-
@req_scorer = nil
|
109
|
-
return false
|
110
|
-
end
|
111
|
-
return to_non_excluded()
|
112
|
-
end
|
113
|
-
|
114
|
-
def explain(doc)
|
115
|
-
e = Explanation.new()
|
116
|
-
if @excl_scorer.skip_to(doc) and @excl_scorer.doc == doc
|
117
|
-
e.description = "excluded"
|
118
|
-
else
|
119
|
-
e.description = "not excluded"
|
120
|
-
e.details << @req_scorer.explain(doc)
|
121
|
-
end
|
122
|
-
return e
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
module Ferret::Search
|
2
|
-
# A Scorer for queries with a required part and an optional part.
|
3
|
-
# Delays skip_to() on the optional part until a score() is needed.
|
4
|
-
#
|
5
|
-
# This +Scorer+ implements Scorer#skip_to(int).
|
6
|
-
class ReqOptSumScorer < Scorer
|
7
|
-
# The scorers passed from the constructor.
|
8
|
-
# These are set to nil as soon as their next? or skip_to() returns false.
|
9
|
-
#
|
10
|
-
# Construct a +ReqOptScorer+.
|
11
|
-
# req_scorer:: The required scorer. This must match.
|
12
|
-
# opt_scorer:: The optional scorer. This is used for scoring only.
|
13
|
-
def initialize(req_scorer, opt_scorer)
|
14
|
-
super(nil) # No similarity used.
|
15
|
-
@req_scorer = req_scorer
|
16
|
-
@opt_scorer = opt_scorer
|
17
|
-
|
18
|
-
@first_time_opt_scorer = true
|
19
|
-
end
|
20
|
-
|
21
|
-
|
22
|
-
def next?
|
23
|
-
return @req_scorer.next?
|
24
|
-
end
|
25
|
-
|
26
|
-
def skip_to(target)
|
27
|
-
return @req_scorer.skip_to(target)
|
28
|
-
end
|
29
|
-
|
30
|
-
def doc()
|
31
|
-
return @req_scorer.doc()
|
32
|
-
end
|
33
|
-
|
34
|
-
# Returns the score of the current document matching the query.
|
35
|
-
# Initially invalid, until #next? is called the first time.
|
36
|
-
#
|
37
|
-
# returns:: The score of the required scorer, eventually increased by the
|
38
|
-
# score of the optional scorer when it also matches the current
|
39
|
-
# document.
|
40
|
-
def score()
|
41
|
-
cur_doc = @req_scorer.doc
|
42
|
-
req_score = @req_scorer.score
|
43
|
-
if @first_time_opt_scorer
|
44
|
-
@first_time_opt_scorer = false
|
45
|
-
if not @opt_scorer.skip_to(cur_doc)
|
46
|
-
@opt_scorer = nil
|
47
|
-
return req_score
|
48
|
-
end
|
49
|
-
elsif @opt_scorer.nil?
|
50
|
-
return req_score
|
51
|
-
elsif @opt_scorer.doc < cur_doc and not @opt_scorer.skip_to(cur_doc)
|
52
|
-
@opt_scorer = nil
|
53
|
-
return req_score
|
54
|
-
end
|
55
|
-
# assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc)
|
56
|
-
return (@opt_scorer.doc == cur_doc) ? req_score + @opt_scorer.score() : req_score
|
57
|
-
end
|
58
|
-
|
59
|
-
# Explain the score of a document.
|
60
|
-
# @todo Also show the total score.
|
61
|
-
# See BooleanScorer.explain() on how to do this.
|
62
|
-
def explain(doc)
|
63
|
-
e = Explanation.new()
|
64
|
-
e.description = "required, optional"
|
65
|
-
e.details << @req_scorer.explain(doc)
|
66
|
-
e.details << @opt_scorer.explain(doc)
|
67
|
-
return e
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
module Ferret::Search
|
2
|
-
# Expert: Returned by low-level search implementations.
|
3
|
-
# See TopDocs
|
4
|
-
class ScoreDoc
|
5
|
-
include Comparable
|
6
|
-
# Expert: The score of this document for the query.
|
7
|
-
attr_accessor :score
|
8
|
-
|
9
|
-
# Expert: A hit document's number.
|
10
|
-
attr_accessor :doc
|
11
|
-
|
12
|
-
# Expert: Constructs a ScoreDoc.
|
13
|
-
def initialize(doc, score)
|
14
|
-
@doc = doc
|
15
|
-
@score = score
|
16
|
-
end
|
17
|
-
|
18
|
-
# returns a hash value for storage in a Hash
|
19
|
-
def hash()
|
20
|
-
return 100 * doc * score
|
21
|
-
end
|
22
|
-
|
23
|
-
# score_docA < score_docB if score_docA.score < score_docB.score or
|
24
|
-
# score_docA.doc > score_docB.doc
|
25
|
-
def <=>(other)
|
26
|
-
result = @score.<=>(other.score)
|
27
|
-
if (result == 0)
|
28
|
-
return other.doc.<=>(@doc)
|
29
|
-
else
|
30
|
-
return result
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def to_s
|
35
|
-
"#{@doc} -> %0.2f" % @score
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,114 +0,0 @@
|
|
1
|
-
module Ferret::Search
|
2
|
-
# Expert: Compares two ScoreDoc objects for sorting.
|
3
|
-
class ScoreDocComparator
|
4
|
-
|
5
|
-
# Special comparator for sorting hits according to computed relevance (score).
|
6
|
-
RELEVANCE = ScoreDocComparator.new()
|
7
|
-
class <<RELEVANCE
|
8
|
-
def compare(i, j)
|
9
|
-
return j.score <=> i.score
|
10
|
-
end
|
11
|
-
def sort_value(i)
|
12
|
-
return i.score
|
13
|
-
end
|
14
|
-
def sort_type()
|
15
|
-
return SortField::SortType::SCORE
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
# Special comparator for sorting hits according to index order (number).
|
21
|
-
INDEX_ORDER = ScoreDocComparator.new()
|
22
|
-
class <<INDEX_ORDER
|
23
|
-
def compare(i, j)
|
24
|
-
return i.doc <=> j.doc
|
25
|
-
end
|
26
|
-
def sort_value(i)
|
27
|
-
return i.doc
|
28
|
-
end
|
29
|
-
def sort_type()
|
30
|
-
return SortField::SortType::DOC
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
|
35
|
-
# Compares two ScoreDoc objects and returns a result indicating their
|
36
|
-
# sort order.
|
37
|
-
# i:: First ScoreDoc
|
38
|
-
# j:: Second ScoreDoc
|
39
|
-
# returns:: +-1+ if +i+ should come before +j+
|
40
|
-
# +1+ if +i+ should come after +j+
|
41
|
-
# +0+ if they are equal
|
42
|
-
def compare(i, j)
|
43
|
-
return NotImplementedError
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
# Returns the value used to sort the given document. The object returned
|
48
|
-
# must implement the java.io.Serializable interface. This is used by
|
49
|
-
# multisearchers to determine how to collate results from their searchers.
|
50
|
-
#
|
51
|
-
# See FieldDoc
|
52
|
-
# i:: Document
|
53
|
-
# returns:: Serializable object
|
54
|
-
def sort_value(i)
|
55
|
-
return NotImplementedError
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
# Returns the type of sort. Should return +SortField.SCORE+,
|
60
|
-
# +SortField.DOC+, +SortField.STRING+, +SortField.INTEGER+,
|
61
|
-
# +SortField.FLOAT+ or +SortField.CUSTOM+. It is not valid to return
|
62
|
-
# +SortField.AUTO+.
|
63
|
-
# This is used by multisearchers to determine how to collate results from
|
64
|
-
# their searchers. returns:: One of the constants in SortField.
|
65
|
-
# See SortField
|
66
|
-
def sort_type()
|
67
|
-
return NotImplementedError
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
class SimpleFieldComparator < ScoreDocComparator
|
72
|
-
def initialize(index, sort_type)
|
73
|
-
@index = index
|
74
|
-
@sort_type = sort_type
|
75
|
-
end
|
76
|
-
|
77
|
-
def compare(i, j)
|
78
|
-
return @index[i.doc] <=> @index[j.doc]
|
79
|
-
end
|
80
|
-
def sort_value(i)
|
81
|
-
return @index[i.doc]
|
82
|
-
end
|
83
|
-
def sort_type()
|
84
|
-
return @sort_type
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
class SpecialFieldComparator < SimpleFieldComparator
|
89
|
-
def initialize(index, sort_type, comparator)
|
90
|
-
super(index, sort_type)
|
91
|
-
@comparator = comparator
|
92
|
-
end
|
93
|
-
def compare(i, j)
|
94
|
-
return @comparator.call(@index[i.doc], @index[j.doc])
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
class StringFieldComparator < ScoreDocComparator
|
99
|
-
def initialize(index)
|
100
|
-
@str_index = index.str_index
|
101
|
-
@str_map = index.str_map
|
102
|
-
end
|
103
|
-
|
104
|
-
def compare(i, j)
|
105
|
-
return @str_index[i.doc] <=> @str_index[j.doc]
|
106
|
-
end
|
107
|
-
def sort_value(i)
|
108
|
-
return @str_map[@str_index[i.doc]]
|
109
|
-
end
|
110
|
-
def sort_type()
|
111
|
-
return SortField::SortType::STRING
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|