ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,25 @@
1
+ module Ferret::Search::Spans
2
+ # Base class for span-based queries.
3
+ class SpanQuery < Ferret::Search::Query
4
+ # Expert: Returns the matches for this query in an index. Used internally
5
+ # to search for spans.
6
+ def spans(reader)
7
+ raise NotImplementedError
8
+ end
9
+
10
+ # Returns the name of the field matched by this query.
11
+ def field()
12
+ raise NotImplementedError
13
+ end
14
+
15
+ # Returns a collection of all terms matched by this query.
16
+ def terms()
17
+ raise NotImplementedError
18
+ end
19
+
20
+ def create_weight(searcher)
21
+ return SpanWeight.new(self, searcher)
22
+ end
23
+ end
24
+ end
25
+
@@ -0,0 +1,74 @@
1
+ module Ferret::Search::Spans
2
+ class SpanScorer < Ferret::Search::Scorer
3
+ include Ferret::Search
4
+
5
+ def initialize(spans, weight, similarity, norms)
6
+ @first_time = true
7
+ @more = true
8
+
9
+ super(similarity)
10
+ @spans = spans
11
+ @norms = norms
12
+ @weight = weight
13
+ @value = weight.value()
14
+ @freq = 0.0
15
+ end
16
+
17
+ def next?
18
+ if (@first_time)
19
+ @more = @spans.next?
20
+ @first_time = false
21
+ end
22
+
23
+ return false if not @more
24
+
25
+ @freq = 0.0
26
+ @doc = @spans.doc
27
+
28
+ while (@more and @doc == @spans.doc)
29
+ match_length = @spans.finish - @spans.start
30
+ @freq += similarity().sloppy_freq(match_length)
31
+ @more = @spans.next?
32
+ end
33
+
34
+ return (@more or @freq != 0.0)
35
+ end
36
+
37
+ def doc() return @doc end
38
+
39
+ def score()
40
+ raw = similarity().tf(@freq) * @value # raw score
41
+ # normalize
42
+ return raw * Similarity.decode_norm(@norms[@doc])
43
+ end
44
+
45
+ def skip_to(target)
46
+ @more = @spans.skip_to(target)
47
+
48
+ return false if not @more
49
+
50
+ @freq = 0.0
51
+ @doc = @spans.doc()
52
+
53
+ while (@more and @spans.doc() == target)
54
+ @freq += similarity().sloppy_freq(@spans.finish - @spans.start)
55
+ @more = @spans.next?
56
+ end
57
+
58
+ return (@more or @freq != 0.0)
59
+ end
60
+
61
+ def explain(doc)
62
+ tf_explanation = Explanation.new()
63
+
64
+ skip_to(doc)
65
+
66
+ phrase_freq = ((doc() == doc) ? @freq : 0.0)
67
+ tf_explanation.value = similarity().tf(phrase_freq)
68
+ tf_explanation.description = "tf(phrase_freq=#{phrase_freq})"
69
+
70
+ return tf_explanation
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,105 @@
1
+ module Ferret::Search::Spans
2
+ # Matches spans containing a term.
3
+ class SpanTermQuery < SpanQuery
4
+ # Construct a SpanTermQuery matching the named term's spans.
5
+ def initialize(term)
6
+ super()
7
+ @term = term
8
+ end
9
+
10
+ # Return the term whose spans are matched.
11
+ def term() @term end
12
+
13
+ def field() @term.field() end
14
+
15
+ def terms() [@term] end
16
+
17
+ def to_s(field = nil)
18
+ if @term.field == field
19
+ return @term.text
20
+ else
21
+ return @term.to_s
22
+ end
23
+ end
24
+
25
+ # Returns true iff +o+ is equal to this.
26
+ def eql?(o)
27
+ return (o.instance_of?(SpanTermQuery) and boost() == o.boost and @term == o.term)
28
+ end
29
+ alias :== :eql?
30
+
31
+ # Returns a hash code value for this object.
32
+ def hash()
33
+ return boost().hash ^ @term.hash
34
+ end
35
+
36
+ def spans(reader)
37
+ return SpanTermEnum.new(self, reader)
38
+ end
39
+
40
+ class SpanTermEnum < SpansEnum
41
+ def initialize(query, reader)
42
+ @query = query
43
+ @positions = reader.term_positions_for(@query.term)
44
+ @position = -1
45
+ @doc = -1
46
+ @count = 0
47
+ @freq = 0
48
+ end
49
+
50
+ def next?
51
+ if (@count == @freq)
52
+ if not @positions.next?
53
+ @doc = Ferret::Search::Scorer::MAX_DOCS
54
+ return false
55
+ end
56
+ @doc = @positions.doc()
57
+ @freq = @positions.freq()
58
+ @count = 0
59
+ end
60
+ @position = @positions.next_position()
61
+ @count += 1
62
+ return true
63
+ end
64
+
65
+ def skip_to(target)
66
+ # are we already at the correct position?
67
+ if (@doc >= target)
68
+ return true
69
+ end
70
+
71
+ if not @positions.skip_to(target)
72
+ @doc = Ferret::Search::Scorer::MAX_DOCS
73
+ return false
74
+ end
75
+
76
+ @doc = @positions.doc()
77
+ @freq = @positions.freq()
78
+ @count = 0
79
+
80
+ @position = @positions.next_position()
81
+ @count += 1
82
+
83
+ return true
84
+ end
85
+
86
+ def doc() @doc end
87
+ def start() @position end
88
+ def finish() @position + 1 end
89
+
90
+ def to_s()
91
+ buffer = "spans(#{@query})@"
92
+ if @doc < 0
93
+ buffer << "START"
94
+ else
95
+ if @doc == Ferret::Search::Scorer::MAX_DOCS
96
+ buffer << "END"
97
+ else
98
+ buffer << "#{@doc}-#{@position}"
99
+ end
100
+ end
101
+ return buffer
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,84 @@
1
+ module Ferret::Search::Spans
2
+ class SpanWeight < Ferret::Search::Weight
3
+ include Ferret::Search
4
+ def initialize(query, searcher)
5
+ @similarity = query.similarity(searcher)
6
+ @query = query
7
+ @terms = query.terms()
8
+
9
+ @idf = @query.similarity(searcher).idf_phrase(@terms, searcher)
10
+ end
11
+
12
+ attr_reader :query, :value
13
+
14
+ def sum_of_squared_weights()
15
+ @query_weight = @idf * @query.boost() # compute query weight
16
+ return @query_weight * @query_weight # square it
17
+ end
18
+
19
+ def normalize(query_norm)
20
+ @query_norm = query_norm
21
+ @query_weight *= query_norm # normalize query weight
22
+ @value = @query_weight * @idf # idf for document
23
+ end
24
+
25
+ def scorer(reader)
26
+ return SpanScorer.new(@query.spans(reader), self,
27
+ @similarity,
28
+ reader.get_norms(@query.field))
29
+ end
30
+
31
+ def explain(reader, doc)
32
+ result = Explanation.new()
33
+ result.description = "weight(#{@query} in #{doc}), product of:"
34
+ field = @query.field
35
+
36
+ doc_freqs = @terms.map {|t| "#{t.text}=#{reader.doc_freq(t)}"}.join(' ')
37
+
38
+ idf_expl = Explanation.new(@idf, "idf(#{field}: #{doc_freqs})")
39
+
40
+ # explain query weight
41
+ query_expl = Explanation.new()
42
+ query_expl.description = "query_weight(#{@query}), product of:"
43
+
44
+ boost_expl = Explanation.new(@query.boost, "boost")
45
+ query_expl << boost_expl if (@query.boost != 1.0)
46
+ query_expl << idf_expl
47
+
48
+ query_norm_expl = Explanation.new(@query_norm,"query_norm")
49
+ query_expl << query_norm_expl
50
+
51
+ query_expl.value = boost_expl.value * idf_expl.value * query_norm_expl.value
52
+
53
+ result << query_expl
54
+
55
+ # explain field weight
56
+ field_expl = Explanation.new()
57
+ field_expl.description = "field_weight(#{field}:#{@query.to_s(field)}"+
58
+ " in #{doc}), product of:"
59
+
60
+ tf_expl = scorer(reader).explain(doc)
61
+ field_expl << tf_expl
62
+ field_expl << idf_expl
63
+
64
+ field_norm_expl = Explanation.new()
65
+ field_norms = reader.get_norms(field)
66
+ field_norm = (field_norms ? Similarity.decode_norm(field_norms[doc]) : 0.0)
67
+ field_norm_expl.value = field_norm
68
+ field_norm_expl.description = "field_norm(field=#{field}, doc=#{doc})"
69
+ field_expl << field_norm_expl
70
+
71
+ field_expl.value = tf_expl.value * idf_expl.value * field_norm_expl.value
72
+
73
+ result << field_expl
74
+
75
+ # combine them
76
+ result.value = query_expl.value * field_expl.value
77
+
78
+ if (query_expl.value == 1.0)
79
+ return field_expl
80
+ end
81
+ return result
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,44 @@
1
+ module Ferret::Search::Spans
2
+ # Expert: an enumeration of span matches. Used to implement span searching.
3
+ # Each span represents a range of term positions within a document. Matches
4
+ # are enumerated in order, by increasing document number, within that by
5
+ # increasing start position and ensure by increasing finish position.
6
+ class SpansEnum
7
+ # Move to the next match, returning true iff any such exists.
8
+ def next?()
9
+ raise NotImplementedError
10
+ end
11
+
12
+ # Skips to the first match beyond the current, whose document number is
13
+ # greater than or equal to _target_. Returns true iff there is such a
14
+ # match. Behaves as if written:
15
+ #
16
+ # def skip_to(target)
17
+ # begin
18
+ # return false if (!next?)
19
+ # end while (target > doc)
20
+ # return true
21
+ # end
22
+ #
23
+ # Most implementations are considerably more efficient than that.
24
+ def skip_to(target)
25
+ raise NotImplementedError
26
+ end
27
+
28
+ # Returns the document number of the current match. Initially invalid.
29
+ def doc()
30
+ raise NotImplementedError
31
+ end
32
+
33
+
34
+ # Returns the start position of the current match. Initially invalid.
35
+ def start()
36
+ raise NotImplementedError
37
+ end
38
+
39
+ # Returns the finish position of the current match. Initially invalid.
40
+ def finish()
41
+ raise NotImplementedError
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,128 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents containing a @term.
3
+ # This may be combined with other terms with a BooleanQuery.
4
+ class TermQuery < Query
5
+
6
+ attr_reader :term
7
+
8
+ class TermWeight < Weight
9
+ attr_reader :value, :query
10
+
11
+ def initialize(query, searcher)
12
+ @similarity = query.similarity(searcher)
13
+ @idf = @similarity.idf(searcher.doc_freq(query.term),
14
+ searcher.max_doc) # compute idf
15
+ @query = query
16
+ @value = 0
17
+ end
18
+
19
+ def to_s() return "TermWeight(#{@value})"; end
20
+
21
+ def sum_of_squared_weights()
22
+ @query_weight = @idf * @query.boost() # compute query weight
23
+ return @query_weight * @query_weight # square it
24
+ end
25
+
26
+ def normalize(query_norm)
27
+ @query_norm = query_norm
28
+ @query_weight *= query_norm # normalize query weight
29
+ @value = @query_weight * @idf # idf for document
30
+ end
31
+
32
+ def scorer(reader)
33
+ term_docs = reader.term_docs_for(@query.term)
34
+
35
+ return nil if term_docs.nil?
36
+
37
+ return TermScorer.new(self, term_docs, @similarity,
38
+ reader.get_norms(@query.term.field))
39
+ end
40
+
41
+ def explain(reader, doc)
42
+ explanation = Explanation.new()
43
+ explanation.description = "weight(#{@query} in #{doc}), product of:"
44
+
45
+ idf_expl = Explanation.new(@idf, "idf(doc_freq=#{reader.doc_freq(@query.term)})")
46
+
47
+ # explain query weight
48
+ query_expl = Explanation.new(nil, "query_weight(#{@query}), product of:")
49
+
50
+ boost_expl = Explanation.new(@query.boost(), "boost")
51
+ if (@query.boost() != 1.0)
52
+ query_expl << boost_expl
53
+ end
54
+ query_expl << idf_expl
55
+
56
+ query_norm_expl = Explanation.new(@query_norm||0.0,"query_norm")
57
+ query_expl << query_norm_expl
58
+
59
+ query_expl.value = boost_expl.value * idf_expl.value * query_norm_expl.value
60
+
61
+ explanation << query_expl
62
+
63
+ # explain field weight
64
+ field_name = @query.term.field
65
+ field_expl = Explanation.new()
66
+ field_expl.description = "field_weight(#{@query.term} in #{doc}), product of:"
67
+
68
+ tf_expl = scorer(reader).explain(doc)
69
+ field_expl << (tf_expl)
70
+ field_expl << (idf_expl)
71
+
72
+ field_norms = reader.get_norms(field_name)
73
+ field_norm = field_norms.nil? ? 0.0 : Similarity.decode_norm(field_norms[doc])
74
+ field_norm_expl = Explanation.new(field_norm,
75
+ "field_norm(field=#{field_name}, doc=#{doc})")
76
+ field_expl << field_norm_expl
77
+
78
+ field_expl.value = tf_expl.value * idf_expl.value * field_norm_expl.value
79
+ explanation << field_expl
80
+
81
+ # combine them
82
+ explanation.value = (query_expl.value * field_expl.value)
83
+
84
+ if (query_expl.value == 1.0)
85
+ return field_expl
86
+ end
87
+
88
+ return explanation
89
+ end
90
+ end
91
+
92
+ # Constructs a query for the @query.term +t+.
93
+ def initialize(t)
94
+ super()
95
+ @term = t
96
+ end
97
+
98
+ def create_weight(searcher)
99
+ return TermWeight.new(self, searcher)
100
+ end
101
+
102
+ def extract_terms(terms)
103
+ terms << @term
104
+ end
105
+
106
+ # Prints a user-readable version of this query.
107
+ def to_s(field = nil)
108
+ buffer = ""
109
+ buffer << "#{@term.field}:" if field != @term.field
110
+ buffer << "#{@term.text}"
111
+ buffer << "^#{@boost}" if @boost != 1.0
112
+ return buffer
113
+ end
114
+
115
+ # Returns true iff +o+ is equal to this.
116
+ def eql?(other)
117
+ return false if not other.instance_of?(TermQuery)
118
+ return (@boost == other.boost and @term == other.term)
119
+ end
120
+ alias :== :eql?
121
+
122
+ # Returns a hash code value for this object.
123
+ def hash()
124
+ return @boost.hash ^ @term.hash
125
+ end
126
+
127
+ end
128
+ end