ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,49 @@
1
+ require 'ferret/search/similarity.rb'
2
+ require 'ferret/search/boolean_clause.rb'
3
+ require 'ferret/search/scorer.rb'
4
+ require 'ferret/search/score_doc.rb'
5
+ require 'ferret/search/score_doc_comparator.rb'
6
+ require 'ferret/search/weight.rb'
7
+ require 'ferret/search/query.rb'
8
+ require 'ferret/search/term_query.rb'
9
+ require 'ferret/search/term_scorer.rb'
10
+ require 'ferret/search/top_docs.rb'
11
+ require 'ferret/search/boolean_query.rb'
12
+ require 'ferret/search/conjunction_scorer.rb'
13
+ require 'ferret/search/disjunction_sum_scorer.rb'
14
+ require 'ferret/search/multi_term_query.rb'
15
+ require 'ferret/search/phrase_query.rb'
16
+ require 'ferret/search/multi_phrase_query.rb'
17
+ require 'ferret/search/prefix_query.rb'
18
+ require 'ferret/search/range_query.rb'
19
+ require 'ferret/search/filtered_term_enum.rb'
20
+ require 'ferret/search/wildcard_term_enum.rb'
21
+ require 'ferret/search/wildcard_query.rb'
22
+ require 'ferret/search/fuzzy_term_enum.rb'
23
+ require 'ferret/search/fuzzy_query.rb'
24
+ require 'ferret/search/phrase_positions.rb'
25
+ require 'ferret/search/phrase_scorer.rb'
26
+ require 'ferret/search/exact_phrase_scorer.rb'
27
+ require 'ferret/search/sloppy_phrase_scorer.rb'
28
+ require 'ferret/search/boolean_scorer.rb'
29
+ require 'ferret/search/explanation.rb'
30
+ require 'ferret/search/field_doc.rb'
31
+ require 'ferret/search/hit_collector.rb'
32
+ require 'ferret/search/hit_queue.rb'
33
+ require 'ferret/search/non_matching_scorer.rb'
34
+ require 'ferret/search/req_excl_scorer.rb'
35
+ require 'ferret/search/req_opt_sum_scorer.rb'
36
+ require 'ferret/search/score_doc.rb'
37
+ require 'ferret/search/score_doc_comparator.rb'
38
+ require 'ferret/search/sort_field.rb'
39
+ require 'ferret/search/sort.rb'
40
+ require 'ferret/search/field_cache.rb'
41
+ require 'ferret/search/field_sorted_hit_queue.rb'
42
+ require 'ferret/search/filter.rb'
43
+ require 'ferret/search/range_filter.rb'
44
+ require 'ferret/search/query_filter.rb'
45
+ require 'ferret/search/caching_wrapper_filter.rb'
46
+ require 'ferret/search/filtered_query.rb'
47
+ require 'ferret/search/match_all_docs_query.rb'
48
+ require 'ferret/search/spans.rb'
49
+ require 'ferret/search/index_searcher.rb'
@@ -0,0 +1,100 @@
1
+
2
+ module Ferret::Search
3
+
4
+ # A clause in a BooleanQuery.
5
+ class BooleanClause
6
+
7
+ class Occur < Ferret::Utils::Parameter
8
+
9
+ def to_s()
10
+ return "+" if (self == MUST)
11
+ return "-" if (self == MUST_NOT)
12
+ return ""
13
+ end
14
+
15
+ # Use this operator for terms that _must_ appear in the matching
16
+ # documents.
17
+ MUST = Occur.new("MUST")
18
+
19
+ # Use this operator for terms that _should_ appear in the matching
20
+ # documents. For a BooleanQuery with two +SHOULD+ subqueries, at
21
+ # least one of the queries must appear in the matching documents.
22
+ SHOULD = Occur.new("SHOULD")
23
+
24
+ # Use this operator for terms that _must not_ appear in the matching
25
+ # documents. Note that it is not possible to search for queries that
26
+ # only consist of a +MUST_NOT+ query.
27
+ MUST_NOT = Occur.new("MUST_NOT")
28
+ end
29
+
30
+ # The query whose matching documents are combined by the boolean query.
31
+ attr_accessor :query
32
+
33
+ # If true, documents documents which _do not_ match this sub-query will
34
+ # _not_ match the boolean query.
35
+ attr_writer :required
36
+ def required?
37
+ @required
38
+ end
39
+
40
+ # If true, documents documents which _do_ match this sub-query will _not_
41
+ # match the boolean query.
42
+ attr_writer :prohibited
43
+ def prohibited?
44
+ @prohibited
45
+ end
46
+
47
+ # See BooleanQuery::Occur for values for this attribute
48
+ attr_reader :occur
49
+ def occur=(occur)
50
+ @occur = occur
51
+ set_fields(occur)
52
+ end
53
+
54
+ # Constructs a BooleanClause. Default value for occur is Occur::SHOULD
55
+ def initialize(query, occur = Occur::SHOULD)
56
+ @query = query
57
+ @occur = occur
58
+ set_fields(occur)
59
+ end
60
+
61
+
62
+ # Returns true iff +other+ is equal to this.
63
+ def eql?(other)
64
+ if not other.instance_of?(BooleanClause)
65
+ return false
66
+ end
67
+ return (@query == other.query and
68
+ @required == other.required? and
69
+ @prohibited == other.prohibited?)
70
+ end
71
+ alias :== :eql?
72
+
73
+ # Returns a hash code value for this object.
74
+ def hash()
75
+ return @query.hash() ^ (@required ? 1 : 0) ^ (@prohibited ? 2 : 0)
76
+ end
77
+
78
+ # represent a boolean clause as a string
79
+ def to_s()
80
+ return @occur.to_s() + @query.to_s()
81
+ end
82
+
83
+ private
84
+
85
+ def set_fields(occur)
86
+ if (occur == Occur::MUST)
87
+ @required = true
88
+ @prohibited = false
89
+ elsif (occur == Occur::SHOULD)
90
+ @required = false
91
+ @prohibited = false
92
+ elsif (occur == Occur::MUST_NOT)
93
+ @required = false
94
+ @prohibited = true
95
+ else
96
+ raise ArgumentError, "Unknown operator " + occur
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,303 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents matching boolean combinations of other
3
+ # queries, e.g. TermQuerys, PhraseQuerys or other BooleanQuerys.
4
+ class BooleanQuery < Query
5
+
6
+ # The maximum number of clauses permitted. Default value is 1024.
7
+ #
8
+ # TermQuery clauses are generated from for example prefix queries and
9
+ # fuzzy queries. Each TermQuery needs some buffer space during search,
10
+ # so this parameter indirectly controls the maximum buffer requirements
11
+ # for query search.
12
+ #
13
+ # When this parameter becomes a bottleneck for a Query one can use a
14
+ # Filter. For example instead of a RangeQuery one can use a RangeFilter.
15
+ #
16
+ # Attempts to add more than the permitted number of clauses cause
17
+ # TooManyClauses to be raisen.
18
+ attr_accessor :max_clause_count
19
+ attr_accessor :clauses
20
+ DEFAULT_MAX_CLAUSE_COUNT = 1024
21
+
22
+ @@max_clause_count = DEFAULT_MAX_CLAUSE_COUNT
23
+ def BooleanQuery.max_clause_count
24
+ return @@max_clause_count
25
+ end
26
+ def BooleanQuery.max_clause_count=(mcc)
27
+ @@max_clause_count = mcc
28
+ end
29
+
30
+ # Thrown when an attempt is made to add more than #max_clause_count()
31
+ # clauses. This typically happens if a PrefixQuery, FuzzyQuery,
32
+ # WildcardQuery, or RangeQuery is expanded to many terms during search.
33
+ class TooManyClauses < Exception
34
+ end
35
+
36
+ # Constructs an empty boolean query.
37
+ #
38
+ # Similarity#coord(int,int) may be disabled in scoring, as appropriate.
39
+ # For example, this score factor does not make sense for most automatically
40
+ # generated queries, like WildcardQuery and FuzzyQuery.
41
+ #
42
+ # coord_disabled:: disables Similarity#coord(int,int) in scoring.
43
+ def initialize(coord_disabled = false)
44
+ super()
45
+ @coord_disabled = coord_disabled
46
+ @clauses = []
47
+ end
48
+
49
+ # Returns true iff Similarity#coord(int,int) is disabled in scoring for
50
+ # this query instance.
51
+ # See #BooleanQuery(boolean)
52
+ def coord_disabled?()
53
+ return @coord_disabled
54
+ end
55
+
56
+ def similarity(searcher)
57
+ sim = super
58
+ if (@coord_disabled) # disable coord as requested
59
+ class <<sim
60
+ def coord(overlap, max_overlap)
61
+ return 1.0
62
+ end
63
+ end
64
+ end
65
+ return sim
66
+ end
67
+
68
+ # Adds a clause to a boolean query. Clauses may be:
69
+ #
70
+ # required:: which means that documents which _do not_ match this
71
+ # sub-query will _not_ match the boolean query
72
+ # prohibited:: which means that documents which _do_ match this
73
+ # sub-query will _not_ match the boolean query; or
74
+ # neither:: in which case matched documents are neither prohibited
75
+ # from nor required to match the sub-query. However, a
76
+ # document must match at least 1 sub-query to match the
77
+ # boolean query.
78
+ #
79
+ # * For +required+ use add(query, BooleanClause::Occur::MUST)
80
+ # * For +prohibited+ use add(query, BooleanClause::Occur::MUST_NOT)
81
+ # * For +neither+ use add(query, BooleanClause::Occur::SHOULD)
82
+ #
83
+ # raises:: TooManyClauses if the new number of clauses exceeds the
84
+ # maximum clause number #max_clause_count()
85
+ def add_query(query, occur)
86
+ add_clause(BooleanClause.new(query, occur))
87
+ end
88
+
89
+ # Adds a clause to a boolean query.
90
+ # raises:: TooManyClauses if the new number of clauses exceeds the
91
+ # maximum clause number. See #max_clause_count()
92
+ def add_clause(clause)
93
+ if @clauses.size >= @@max_clause_count
94
+ raise TooManyClauses
95
+ end
96
+
97
+ @clauses << clause
98
+ end
99
+ alias :<< :add_clause
100
+
101
+ class BooleanWeight < Weight
102
+ attr_accessor :similarity
103
+ attr_accessor :weights
104
+ attr_reader :query
105
+
106
+ def initialize(query, searcher)
107
+ @query = query
108
+ @weights = []
109
+
110
+ @similarity = query.similarity(searcher)
111
+ query.clauses.each do |clause|
112
+ @weights << clause.query.create_weight(searcher)
113
+ end
114
+ end
115
+
116
+ def value()
117
+ return @query.boost()
118
+ end
119
+
120
+ def sum_of_squared_weights()
121
+ sum = 0
122
+ @weights.each_with_index do |weight, i|
123
+ clause = @query.clauses[i]
124
+ if not clause.prohibited?
125
+ sum += weight.sum_of_squared_weights() # sum sub weights
126
+ end
127
+ end
128
+
129
+ sum *= @query.boost() * @query.boost() # boost each sub-weight
130
+
131
+ return sum
132
+ end
133
+
134
+
135
+ def normalize(norm)
136
+ norm *= @query.boost()
137
+ @weights.each_with_index do |weight, i|
138
+ clause = @query.clauses[i]
139
+ if not clause.prohibited?
140
+ weight.normalize(norm)
141
+ end
142
+ end
143
+ end
144
+
145
+ # returns:: An alternative Scorer that uses and provides skip_to(),
146
+ # and scores documents in document number order.
147
+ def scorer(reader)
148
+ result = BooleanScorer.new(@similarity)
149
+
150
+ @weights.each_with_index do |weight, i|
151
+ clause = @query.clauses[i]
152
+ sub_scorer = weight.scorer(reader)
153
+ if (sub_scorer != nil)
154
+ result.add_scorer(sub_scorer, clause.occur)
155
+ elsif (clause.required?())
156
+ return nil
157
+ end
158
+ end
159
+
160
+ return result
161
+ end
162
+
163
+ def explain(reader, doc)
164
+
165
+ sum_expl = Explanation.new()
166
+ sum_expl.description = "sum of:"
167
+ coord = 0
168
+ max_coord = 0
169
+ sum = 0.0
170
+
171
+ @weights.each_with_index do |weight, i|
172
+ clause = @query.clauses[i]
173
+ explanation = weight.explain(reader, doc)
174
+ max_coord += 1 if not clause.prohibited?
175
+ if explanation.value > 0
176
+ if not clause.prohibited?
177
+ sum_expl << explanation
178
+ sum += explanation.value
179
+ coord += 1
180
+ else
181
+ return Explanation.new(0.0, "match prohibited")
182
+ end
183
+ elsif clause.required?
184
+ return Explanation.new(0.0, "match required")
185
+ end
186
+ end
187
+ sum_expl.value = sum
188
+
189
+ if (coord == 1) # only one clause matched
190
+ sum_expl = sum_expl.details[0] # eliminate wrapper
191
+ end
192
+
193
+ coord_factor = @similarity.coord(coord, max_coord)
194
+ if (coord_factor == 1.0) # coord is no-op
195
+ return sum_expl # eliminate wrapper
196
+ else
197
+ result = Explanation.new()
198
+ result.description = "product of:"
199
+ result << sum_expl
200
+ result << Explanation.new(coord_factor, "coord(#{coord}/#{max_coord})")
201
+ result.value = sum * coord_factor
202
+ return result
203
+ end
204
+ end
205
+ end #end BooleanWeight
206
+
207
+ def create_weight(searcher)
208
+ return BooleanWeight.new(self, searcher)
209
+ end
210
+
211
+ def rewrite(reader)
212
+ if @clauses.size == 1 # optimize 1-clause queries
213
+ clause = @clauses[0]
214
+ if not clause.prohibited? # just return clause
215
+
216
+ query = clause.query.rewrite(reader) # rewrite first
217
+
218
+ if boost() != 1.0 # incorporate boost
219
+ if query == clause.query # if rewrite was no-op
220
+ query = query.clone # then clone before boost
221
+ end
222
+ query.boost = boost() * query.boost()
223
+ end
224
+
225
+ return query
226
+ end
227
+ end
228
+
229
+ clone = nil # recursively rewrite
230
+ @clauses.each_with_index do |clause, i|
231
+ query = clause.query().rewrite(reader)
232
+ if query != clause.query() # clause rewrote: must clone
233
+ clone ||= clone()
234
+ clone.clauses[i] = BooleanClause.new(query, clause.occur)
235
+ end
236
+ end
237
+ if (clone != nil)
238
+ return clone # some clauses rewrote
239
+ else
240
+ return self # no clauses rewrote
241
+ end
242
+ end
243
+
244
+ def extract_terms(terms)
245
+ @clauses.each do |clause|
246
+ clause.query.extract_terms(terms)
247
+ end
248
+ end
249
+
250
+ def combine(queries)
251
+ return Query.merge_boolean_queries(queries)
252
+ end
253
+
254
+ def clone()
255
+ clone = super
256
+ clone.clauses = @clauses.clone
257
+ return clone
258
+ end
259
+
260
+ # Prints a user-readable version of this query.
261
+ def to_s(field = nil)
262
+ buffer = ""
263
+ buffer << "(" if boost != 1.0
264
+
265
+ @clauses.each_with_index do |clause, i|
266
+ if clause.prohibited?
267
+ buffer << "-"
268
+ elsif clause.required?
269
+ buffer << "+"
270
+ end
271
+
272
+ sub_query = clause.query
273
+ if sub_query.instance_of? BooleanQuery # wrap sub-bools in parens
274
+ buffer << "(#{clause.query.to_s(field)})"
275
+ else
276
+ buffer << clause.query.to_s(field)
277
+ end
278
+
279
+ if i != (@clauses.size - 1)
280
+ buffer << " "
281
+ end
282
+ end
283
+
284
+ buffer << ")^#{boost}" if boost() != 1.0
285
+
286
+ return buffer
287
+ end
288
+
289
+ # Returns true iff +o+ is equal to this.
290
+ def eql?(other)
291
+ if not other.instance_of?(BooleanQuery)
292
+ return false
293
+ end
294
+ return (boost() == other.boost() and @clauses == other.clauses)
295
+ end
296
+ alias :== :eql?
297
+
298
+ # Returns a hash code value for this object.
299
+ def hash()
300
+ return boost().hash ^ @clauses.hash
301
+ end
302
+ end
303
+ end