ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,25 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Base class for span-based queries.
|
3
|
-
class SpanQuery < Ferret::Search::Query
|
4
|
-
# Expert: Returns the matches for this query in an index. Used internally
|
5
|
-
# to search for spans.
|
6
|
-
def spans(reader)
|
7
|
-
raise NotImplementedError
|
8
|
-
end
|
9
|
-
|
10
|
-
# Returns the name of the field matched by this query.
|
11
|
-
def field()
|
12
|
-
raise NotImplementedError
|
13
|
-
end
|
14
|
-
|
15
|
-
# Returns a collection of all terms matched by this query.
|
16
|
-
def terms()
|
17
|
-
raise NotImplementedError
|
18
|
-
end
|
19
|
-
|
20
|
-
def create_weight(searcher)
|
21
|
-
return SpanWeight.new(self, searcher)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
@@ -1,74 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
class SpanScorer < Ferret::Search::Scorer
|
3
|
-
include Ferret::Search
|
4
|
-
|
5
|
-
def initialize(spans, weight, similarity, norms)
|
6
|
-
@first_time = true
|
7
|
-
@more = true
|
8
|
-
|
9
|
-
super(similarity)
|
10
|
-
@spans = spans
|
11
|
-
@norms = norms
|
12
|
-
@weight = weight
|
13
|
-
@value = weight.value()
|
14
|
-
@freq = 0.0
|
15
|
-
end
|
16
|
-
|
17
|
-
def next?
|
18
|
-
if (@first_time)
|
19
|
-
@more = @spans.next?
|
20
|
-
@first_time = false
|
21
|
-
end
|
22
|
-
|
23
|
-
return false if not @more
|
24
|
-
|
25
|
-
@freq = 0.0
|
26
|
-
@doc = @spans.doc
|
27
|
-
|
28
|
-
while (@more and @doc == @spans.doc)
|
29
|
-
match_length = @spans.finish - @spans.start
|
30
|
-
@freq += similarity().sloppy_freq(match_length)
|
31
|
-
@more = @spans.next?
|
32
|
-
end
|
33
|
-
|
34
|
-
return (@more or @freq != 0.0)
|
35
|
-
end
|
36
|
-
|
37
|
-
def doc() return @doc end
|
38
|
-
|
39
|
-
def score()
|
40
|
-
raw = similarity().tf(@freq) * @value # raw score
|
41
|
-
# normalize
|
42
|
-
return raw * Similarity.decode_norm(@norms[@doc])
|
43
|
-
end
|
44
|
-
|
45
|
-
def skip_to(target)
|
46
|
-
@more = @spans.skip_to(target)
|
47
|
-
|
48
|
-
return false if not @more
|
49
|
-
|
50
|
-
@freq = 0.0
|
51
|
-
@doc = @spans.doc()
|
52
|
-
|
53
|
-
while (@more and @spans.doc() == target)
|
54
|
-
@freq += similarity().sloppy_freq(@spans.finish - @spans.start)
|
55
|
-
@more = @spans.next?
|
56
|
-
end
|
57
|
-
|
58
|
-
return (@more or @freq != 0.0)
|
59
|
-
end
|
60
|
-
|
61
|
-
def explain(doc)
|
62
|
-
tf_explanation = Explanation.new()
|
63
|
-
|
64
|
-
skip_to(doc)
|
65
|
-
|
66
|
-
phrase_freq = ((doc() == doc) ? @freq : 0.0)
|
67
|
-
tf_explanation.value = similarity().tf(phrase_freq)
|
68
|
-
tf_explanation.description = "tf(phrase_freq=#{phrase_freq})"
|
69
|
-
|
70
|
-
return tf_explanation
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|
@@ -1,105 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Matches spans containing a term.
|
3
|
-
class SpanTermQuery < SpanQuery
|
4
|
-
# Construct a SpanTermQuery matching the named term's spans.
|
5
|
-
def initialize(term)
|
6
|
-
super()
|
7
|
-
@term = term
|
8
|
-
end
|
9
|
-
|
10
|
-
# Return the term whose spans are matched.
|
11
|
-
def term() @term end
|
12
|
-
|
13
|
-
def field() @term.field() end
|
14
|
-
|
15
|
-
def terms() [@term] end
|
16
|
-
|
17
|
-
def to_s(field = nil)
|
18
|
-
if @term.field == field
|
19
|
-
return @term.text
|
20
|
-
else
|
21
|
-
return @term.to_s
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
# Returns true iff +o+ is equal to this.
|
26
|
-
def eql?(o)
|
27
|
-
return (o.instance_of?(SpanTermQuery) and boost() == o.boost and @term == o.term)
|
28
|
-
end
|
29
|
-
alias :== :eql?
|
30
|
-
|
31
|
-
# Returns a hash code value for this object.
|
32
|
-
def hash()
|
33
|
-
return boost().hash ^ @term.hash
|
34
|
-
end
|
35
|
-
|
36
|
-
def spans(reader)
|
37
|
-
return SpanTermEnum.new(self, reader)
|
38
|
-
end
|
39
|
-
|
40
|
-
class SpanTermEnum < SpansEnum
|
41
|
-
def initialize(query, reader)
|
42
|
-
@query = query
|
43
|
-
@positions = reader.term_positions_for(@query.term)
|
44
|
-
@position = -1
|
45
|
-
@doc = -1
|
46
|
-
@count = 0
|
47
|
-
@freq = 0
|
48
|
-
end
|
49
|
-
|
50
|
-
def next?
|
51
|
-
if (@count == @freq)
|
52
|
-
if not @positions.next?
|
53
|
-
@doc = Ferret::Search::Scorer::MAX_DOCS
|
54
|
-
return false
|
55
|
-
end
|
56
|
-
@doc = @positions.doc()
|
57
|
-
@freq = @positions.freq()
|
58
|
-
@count = 0
|
59
|
-
end
|
60
|
-
@position = @positions.next_position()
|
61
|
-
@count += 1
|
62
|
-
return true
|
63
|
-
end
|
64
|
-
|
65
|
-
def skip_to(target)
|
66
|
-
# are we already at the correct position?
|
67
|
-
if (@doc >= target)
|
68
|
-
return true
|
69
|
-
end
|
70
|
-
|
71
|
-
if not @positions.skip_to(target)
|
72
|
-
@doc = Ferret::Search::Scorer::MAX_DOCS
|
73
|
-
return false
|
74
|
-
end
|
75
|
-
|
76
|
-
@doc = @positions.doc()
|
77
|
-
@freq = @positions.freq()
|
78
|
-
@count = 0
|
79
|
-
|
80
|
-
@position = @positions.next_position()
|
81
|
-
@count += 1
|
82
|
-
|
83
|
-
return true
|
84
|
-
end
|
85
|
-
|
86
|
-
def doc() @doc end
|
87
|
-
def start() @position end
|
88
|
-
def finish() @position + 1 end
|
89
|
-
|
90
|
-
def to_s()
|
91
|
-
buffer = "spans(#{@query})@"
|
92
|
-
if @doc < 0
|
93
|
-
buffer << "START"
|
94
|
-
else
|
95
|
-
if @doc == Ferret::Search::Scorer::MAX_DOCS
|
96
|
-
buffer << "END"
|
97
|
-
else
|
98
|
-
buffer << "#{@doc}-#{@position}"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
return buffer
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
@@ -1,84 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
class SpanWeight < Ferret::Search::Weight
|
3
|
-
include Ferret::Search
|
4
|
-
def initialize(query, searcher)
|
5
|
-
@similarity = query.similarity(searcher)
|
6
|
-
@query = query
|
7
|
-
@terms = query.terms()
|
8
|
-
|
9
|
-
@idf = @similarity.idf_phrase(@terms, searcher)
|
10
|
-
end
|
11
|
-
|
12
|
-
attr_reader :query, :value
|
13
|
-
|
14
|
-
def sum_of_squared_weights()
|
15
|
-
@query_weight = @idf * @query.boost() # compute query weight
|
16
|
-
return @query_weight * @query_weight # square it
|
17
|
-
end
|
18
|
-
|
19
|
-
def normalize(query_norm)
|
20
|
-
@query_norm = query_norm
|
21
|
-
@query_weight *= query_norm # normalize query weight
|
22
|
-
@value = @query_weight * @idf # idf for document
|
23
|
-
end
|
24
|
-
|
25
|
-
def scorer(reader)
|
26
|
-
return SpanScorer.new(@query.spans(reader), self,
|
27
|
-
@similarity,
|
28
|
-
reader.get_norms(@query.field))
|
29
|
-
end
|
30
|
-
|
31
|
-
def explain(reader, doc)
|
32
|
-
result = Explanation.new()
|
33
|
-
result.description = "weight(#{@query} in #{doc}), product of:"
|
34
|
-
field = @query.field
|
35
|
-
|
36
|
-
doc_freqs = @terms.map {|t| "#{t.text}=#{reader.doc_freq(t)}"}.join(' ')
|
37
|
-
|
38
|
-
idf_expl = Explanation.new(@idf, "idf(#{field}: #{doc_freqs})")
|
39
|
-
|
40
|
-
# explain query weight
|
41
|
-
query_expl = Explanation.new()
|
42
|
-
query_expl.description = "query_weight(#{@query}), product of:"
|
43
|
-
|
44
|
-
boost_expl = Explanation.new(@query.boost, "boost")
|
45
|
-
query_expl << boost_expl if (@query.boost != 1.0)
|
46
|
-
query_expl << idf_expl
|
47
|
-
|
48
|
-
query_norm_expl = Explanation.new(@query_norm,"query_norm")
|
49
|
-
query_expl << query_norm_expl
|
50
|
-
|
51
|
-
query_expl.value = boost_expl.value * idf_expl.value * query_norm_expl.value
|
52
|
-
|
53
|
-
result << query_expl
|
54
|
-
|
55
|
-
# explain field weight
|
56
|
-
field_expl = Explanation.new()
|
57
|
-
field_expl.description = "field_weight(#{field}:#{@query.to_s(field)}"+
|
58
|
-
" in #{doc}), product of:"
|
59
|
-
|
60
|
-
tf_expl = scorer(reader).explain(doc)
|
61
|
-
field_expl << tf_expl
|
62
|
-
field_expl << idf_expl
|
63
|
-
|
64
|
-
field_norm_expl = Explanation.new()
|
65
|
-
field_norms = reader.get_norms(field)
|
66
|
-
field_norm = (field_norms ? Similarity.decode_norm(field_norms[doc]) : 0.0)
|
67
|
-
field_norm_expl.value = field_norm
|
68
|
-
field_norm_expl.description = "field_norm(field=#{field}, doc=#{doc})"
|
69
|
-
field_expl << field_norm_expl
|
70
|
-
|
71
|
-
field_expl.value = tf_expl.value * idf_expl.value * field_norm_expl.value
|
72
|
-
|
73
|
-
result << field_expl
|
74
|
-
|
75
|
-
# combine them
|
76
|
-
result.value = query_expl.value * field_expl.value
|
77
|
-
|
78
|
-
if (query_expl.value == 1.0)
|
79
|
-
return field_expl
|
80
|
-
end
|
81
|
-
return result
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
module Ferret::Search::Spans
|
2
|
-
# Expert: an enumeration of span matches. Used to implement span searching.
|
3
|
-
# Each span represents a range of term positions within a document. Matches
|
4
|
-
# are enumerated in order, by increasing document number, within that by
|
5
|
-
# increasing start position and finally by increasing finish position.
|
6
|
-
class SpansEnum
|
7
|
-
# Move to the next match, returning true iff any such exists.
|
8
|
-
def next?()
|
9
|
-
raise NotImplementedError
|
10
|
-
end
|
11
|
-
|
12
|
-
# Skips to the first match beyond the current, whose document number is
|
13
|
-
# greater than or equal to _target_. Returns true iff there is such a
|
14
|
-
# match. Behaves as if written:
|
15
|
-
#
|
16
|
-
# def skip_to(target)
|
17
|
-
# begin
|
18
|
-
# return false if (!next?)
|
19
|
-
# end while (target > doc)
|
20
|
-
# return true
|
21
|
-
# end
|
22
|
-
#
|
23
|
-
# Most implementations are considerably more efficient than that.
|
24
|
-
def skip_to(target)
|
25
|
-
raise NotImplementedError
|
26
|
-
end
|
27
|
-
|
28
|
-
# Returns the document number of the current match. Initially invalid.
|
29
|
-
def doc()
|
30
|
-
raise NotImplementedError
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
# Returns the start position of the current match. Initially invalid.
|
35
|
-
def start()
|
36
|
-
raise NotImplementedError
|
37
|
-
end
|
38
|
-
|
39
|
-
# Returns the finish position of the current match. Initially invalid.
|
40
|
-
def finish()
|
41
|
-
raise NotImplementedError
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,128 +0,0 @@
|
|
1
|
-
module Ferret::Search
|
2
|
-
# A Query that matches documents containing a @term.
|
3
|
-
# This may be combined with other terms with a BooleanQuery.
|
4
|
-
class TermQuery < Query
|
5
|
-
|
6
|
-
attr_reader :term
|
7
|
-
|
8
|
-
class TermWeight < Weight
|
9
|
-
attr_reader :value, :query
|
10
|
-
|
11
|
-
def initialize(query, searcher)
|
12
|
-
@similarity = query.similarity(searcher)
|
13
|
-
@idf = @similarity.idf(searcher.doc_freq(query.term),
|
14
|
-
searcher.max_doc) # compute idf
|
15
|
-
@query = query
|
16
|
-
@value = 0
|
17
|
-
end
|
18
|
-
|
19
|
-
def to_s() return "TermWeight(#{@value})"; end
|
20
|
-
|
21
|
-
def sum_of_squared_weights()
|
22
|
-
@query_weight = @idf * @query.boost() # compute query weight
|
23
|
-
return @query_weight * @query_weight # square it
|
24
|
-
end
|
25
|
-
|
26
|
-
def normalize(query_norm)
|
27
|
-
@query_norm = query_norm
|
28
|
-
@query_weight *= query_norm # normalize query weight
|
29
|
-
@value = @query_weight * @idf # idf for document
|
30
|
-
end
|
31
|
-
|
32
|
-
def scorer(reader)
|
33
|
-
term_docs = reader.term_docs_for(@query.term)
|
34
|
-
|
35
|
-
return nil if term_docs.nil?
|
36
|
-
|
37
|
-
return TermScorer.new(self, term_docs, @similarity,
|
38
|
-
reader.get_norms(@query.term.field))
|
39
|
-
end
|
40
|
-
|
41
|
-
def explain(reader, doc)
|
42
|
-
explanation = Explanation.new()
|
43
|
-
explanation.description = "weight(#{@query} in #{doc}), product of:"
|
44
|
-
|
45
|
-
idf_expl = Explanation.new(@idf, "idf(doc_freq=#{reader.doc_freq(@query.term)})")
|
46
|
-
|
47
|
-
# explain query weight
|
48
|
-
query_expl = Explanation.new(nil, "query_weight(#{@query}), product of:")
|
49
|
-
|
50
|
-
boost_expl = Explanation.new(@query.boost(), "boost")
|
51
|
-
if (@query.boost() != 1.0)
|
52
|
-
query_expl << boost_expl
|
53
|
-
end
|
54
|
-
query_expl << idf_expl
|
55
|
-
|
56
|
-
query_norm_expl = Explanation.new(@query_norm||0.0,"query_norm")
|
57
|
-
query_expl << query_norm_expl
|
58
|
-
|
59
|
-
query_expl.value = boost_expl.value * idf_expl.value * query_norm_expl.value
|
60
|
-
|
61
|
-
explanation << query_expl
|
62
|
-
|
63
|
-
# explain field weight
|
64
|
-
field_name = @query.term.field
|
65
|
-
field_expl = Explanation.new()
|
66
|
-
field_expl.description = "field_weight(#{@query.term} in #{doc}), product of:"
|
67
|
-
|
68
|
-
tf_expl = scorer(reader).explain(doc)
|
69
|
-
field_expl << (tf_expl)
|
70
|
-
field_expl << (idf_expl)
|
71
|
-
|
72
|
-
field_norms = reader.get_norms(field_name)
|
73
|
-
field_norm = field_norms.nil? ? 0.0 : Similarity.decode_norm(field_norms[doc])
|
74
|
-
field_norm_expl = Explanation.new(field_norm,
|
75
|
-
"field_norm(field=#{field_name}, doc=#{doc})")
|
76
|
-
field_expl << field_norm_expl
|
77
|
-
|
78
|
-
field_expl.value = tf_expl.value * idf_expl.value * field_norm_expl.value
|
79
|
-
explanation << field_expl
|
80
|
-
|
81
|
-
# combine them
|
82
|
-
explanation.value = (query_expl.value * field_expl.value)
|
83
|
-
|
84
|
-
if (query_expl.value == 1.0)
|
85
|
-
return field_expl
|
86
|
-
end
|
87
|
-
|
88
|
-
return explanation
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
# Constructs a query for the @query.term +t+.
|
93
|
-
def initialize(t)
|
94
|
-
super()
|
95
|
-
@term = t
|
96
|
-
end
|
97
|
-
|
98
|
-
def create_weight(searcher)
|
99
|
-
return TermWeight.new(self, searcher)
|
100
|
-
end
|
101
|
-
|
102
|
-
def extract_terms(terms)
|
103
|
-
terms << @term
|
104
|
-
end
|
105
|
-
|
106
|
-
# Prints a user-readable version of this query.
|
107
|
-
def to_s(field = nil)
|
108
|
-
buffer = ""
|
109
|
-
buffer << "#{@term.field}:" if field != @term.field
|
110
|
-
buffer << "#{@term.text}"
|
111
|
-
buffer << "^#{@boost}" if @boost != 1.0
|
112
|
-
return buffer
|
113
|
-
end
|
114
|
-
|
115
|
-
# Returns true iff +o+ is equal to this.
|
116
|
-
def eql?(other)
|
117
|
-
return false if not other.instance_of?(TermQuery)
|
118
|
-
return (@boost == other.boost and @term == other.term)
|
119
|
-
end
|
120
|
-
alias :== :eql?
|
121
|
-
|
122
|
-
# Returns a hash code value for this object.
|
123
|
-
def hash()
|
124
|
-
return @boost.hash ^ @term.hash
|
125
|
-
end
|
126
|
-
|
127
|
-
end
|
128
|
-
end
|