ferret 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,49 @@
1
+ require 'ferret/search/similarity.rb'
2
+ require 'ferret/search/boolean_clause.rb'
3
+ require 'ferret/search/scorer.rb'
4
+ require 'ferret/search/score_doc.rb'
5
+ require 'ferret/search/score_doc_comparator.rb'
6
+ require 'ferret/search/weight.rb'
7
+ require 'ferret/search/query.rb'
8
+ require 'ferret/search/term_query.rb'
9
+ require 'ferret/search/term_scorer.rb'
10
+ require 'ferret/search/top_docs.rb'
11
+ require 'ferret/search/boolean_query.rb'
12
+ require 'ferret/search/conjunction_scorer.rb'
13
+ require 'ferret/search/disjunction_sum_scorer.rb'
14
+ require 'ferret/search/multi_term_query.rb'
15
+ require 'ferret/search/phrase_query.rb'
16
+ require 'ferret/search/multi_phrase_query.rb'
17
+ require 'ferret/search/prefix_query.rb'
18
+ require 'ferret/search/range_query.rb'
19
+ require 'ferret/search/filtered_term_enum.rb'
20
+ require 'ferret/search/wildcard_term_enum.rb'
21
+ require 'ferret/search/wildcard_query.rb'
22
+ require 'ferret/search/fuzzy_term_enum.rb'
23
+ require 'ferret/search/fuzzy_query.rb'
24
+ require 'ferret/search/phrase_positions.rb'
25
+ require 'ferret/search/phrase_scorer.rb'
26
+ require 'ferret/search/exact_phrase_scorer.rb'
27
+ require 'ferret/search/sloppy_phrase_scorer.rb'
28
+ require 'ferret/search/boolean_scorer.rb'
29
+ require 'ferret/search/explanation.rb'
30
+ require 'ferret/search/field_doc.rb'
31
+ require 'ferret/search/hit_collector.rb'
32
+ require 'ferret/search/hit_queue.rb'
33
+ require 'ferret/search/non_matching_scorer.rb'
34
+ require 'ferret/search/req_excl_scorer.rb'
35
+ require 'ferret/search/req_opt_sum_scorer.rb'
36
+ require 'ferret/search/score_doc.rb'
37
+ require 'ferret/search/score_doc_comparator.rb'
38
+ require 'ferret/search/sort_field.rb'
39
+ require 'ferret/search/sort.rb'
40
+ require 'ferret/search/field_cache.rb'
41
+ require 'ferret/search/field_sorted_hit_queue.rb'
42
+ require 'ferret/search/filter.rb'
43
+ require 'ferret/search/range_filter.rb'
44
+ require 'ferret/search/query_filter.rb'
45
+ require 'ferret/search/caching_wrapper_filter.rb'
46
+ require 'ferret/search/filtered_query.rb'
47
+ require 'ferret/search/match_all_docs_query.rb'
48
+ require 'ferret/search/spans.rb'
49
+ require 'ferret/search/index_searcher.rb'
@@ -0,0 +1,100 @@
1
+
2
+ module Ferret::Search
3
+
4
+ # A clause in a BooleanQuery.
5
+ class BooleanClause
6
+
7
+ class Occur < Ferret::Utils::Parameter
8
+
9
+ def to_s()
10
+ return "+" if (self == MUST)
11
+ return "-" if (self == MUST_NOT)
12
+ return ""
13
+ end
14
+
15
+ # Use this operator for terms that _must_ appear in the matching
16
+ # documents.
17
+ MUST = Occur.new("MUST")
18
+
19
+ # Use this operator for terms that _should_ appear in the matching
20
+ # documents. For a BooleanQuery with two +SHOULD+ subqueries, at
21
+ # least one of the queries must appear in the matching documents.
22
+ SHOULD = Occur.new("SHOULD")
23
+
24
+ # Use this operator for terms that _must not_ appear in the matching
25
+ # documents. Note that it is not possible to search for queries that
26
+ # only consist of a +MUST_NOT+ query.
27
+ MUST_NOT = Occur.new("MUST_NOT")
28
+ end
29
+
30
+ # The query whose matching documents are combined by the boolean query.
31
+ attr_accessor :query
32
+
33
+ # If true, documents documents which _do not_ match this sub-query will
34
+ # _not_ match the boolean query.
35
+ attr_writer :required
36
+ def required?
37
+ @required
38
+ end
39
+
40
+ # If true, documents documents which _do_ match this sub-query will _not_
41
+ # match the boolean query.
42
+ attr_writer :prohibited
43
+ def prohibited?
44
+ @prohibited
45
+ end
46
+
47
+ # See BooleanQuery::Occur for values for this attribute
48
+ attr_reader :occur
49
+ def occur=(occur)
50
+ @occur = occur
51
+ set_fields(occur)
52
+ end
53
+
54
+ # Constructs a BooleanClause. Default value for occur is Occur::SHOULD
55
+ def initialize(query, occur = Occur::SHOULD)
56
+ @query = query
57
+ @occur = occur
58
+ set_fields(occur)
59
+ end
60
+
61
+
62
+ # Returns true iff +other+ is equal to this.
63
+ def eql?(other)
64
+ if not other.instance_of?(BooleanClause)
65
+ return false
66
+ end
67
+ return (@query == other.query and
68
+ @required == other.required? and
69
+ @prohibited == other.prohibited?)
70
+ end
71
+ alias :== :eql?
72
+
73
+ # Returns a hash code value for this object.
74
+ def hash()
75
+ return @query.hash() ^ (@required ? 1 : 0) ^ (@prohibited ? 2 : 0)
76
+ end
77
+
78
+ # represent a boolean clause as a string
79
+ def to_s()
80
+ return @occur.to_s() + @query.to_s()
81
+ end
82
+
83
+ private
84
+
85
+ def set_fields(occur)
86
+ if (occur == Occur::MUST)
87
+ @required = true
88
+ @prohibited = false
89
+ elsif (occur == Occur::SHOULD)
90
+ @required = false
91
+ @prohibited = false
92
+ elsif (occur == Occur::MUST_NOT)
93
+ @required = false
94
+ @prohibited = true
95
+ else
96
+ raise ArgumentError, "Unknown operator " + occur
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,303 @@
1
+ module Ferret::Search
2
+ # A Query that matches documents matching boolean combinations of other
3
+ # queries, e.g. TermQuerys, PhraseQuerys or other BooleanQuerys.
4
+ class BooleanQuery < Query
5
+
6
+ # The maximum number of clauses permitted. Default value is 1024.
7
+ #
8
+ # TermQuery clauses are generated from for example prefix queries and
9
+ # fuzzy queries. Each TermQuery needs some buffer space during search,
10
+ # so this parameter indirectly controls the maximum buffer requirements
11
+ # for query search.
12
+ #
13
+ # When this parameter becomes a bottleneck for a Query one can use a
14
+ # Filter. For example instead of a RangeQuery one can use a RangeFilter.
15
+ #
16
+ # Attempts to add more than the permitted number of clauses cause
17
+ # TooManyClauses to be raisen.
18
+ attr_accessor :max_clause_count
19
+ attr_accessor :clauses
20
+ DEFAULT_MAX_CLAUSE_COUNT = 1024
21
+
22
+ @@max_clause_count = DEFAULT_MAX_CLAUSE_COUNT
23
+ def BooleanQuery.max_clause_count
24
+ return @@max_clause_count
25
+ end
26
+ def BooleanQuery.max_clause_count=(mcc)
27
+ @@max_clause_count = mcc
28
+ end
29
+
30
+ # Thrown when an attempt is made to add more than #max_clause_count()
31
+ # clauses. This typically happens if a PrefixQuery, FuzzyQuery,
32
+ # WildcardQuery, or RangeQuery is expanded to many terms during search.
33
+ class TooManyClauses < Exception
34
+ end
35
+
36
+ # Constructs an empty boolean query.
37
+ #
38
+ # Similarity#coord(int,int) may be disabled in scoring, as appropriate.
39
+ # For example, this score factor does not make sense for most automatically
40
+ # generated queries, like WildcardQuery and FuzzyQuery.
41
+ #
42
+ # coord_disabled:: disables Similarity#coord(int,int) in scoring.
43
+ def initialize(coord_disabled = false)
44
+ super()
45
+ @coord_disabled = coord_disabled
46
+ @clauses = []
47
+ end
48
+
49
+ # Returns true iff Similarity#coord(int,int) is disabled in scoring for
50
+ # this query instance.
51
+ # See #BooleanQuery(boolean)
52
+ def coord_disabled?()
53
+ return @coord_disabled
54
+ end
55
+
56
+ def similarity(searcher)
57
+ sim = super
58
+ if (@coord_disabled) # disable coord as requested
59
+ class <<sim
60
+ def coord(overlap, max_overlap)
61
+ return 1.0
62
+ end
63
+ end
64
+ end
65
+ return sim
66
+ end
67
+
68
+ # Adds a clause to a boolean query. Clauses may be:
69
+ #
70
+ # required:: which means that documents which _do not_ match this
71
+ # sub-query will _not_ match the boolean query
72
+ # prohibited:: which means that documents which _do_ match this
73
+ # sub-query will _not_ match the boolean query; or
74
+ # neither:: in which case matched documents are neither prohibited
75
+ # from nor required to match the sub-query. However, a
76
+ # document must match at least 1 sub-query to match the
77
+ # boolean query.
78
+ #
79
+ # * For +required+ use add(query, BooleanClause::Occur::MUST)
80
+ # * For +prohibited+ use add(query, BooleanClause::Occur::MUST_NOT)
81
+ # * For +neither+ use add(query, BooleanClause::Occur::SHOULD)
82
+ #
83
+ # raises:: TooManyClauses if the new number of clauses exceeds the
84
+ # maximum clause number #max_clause_count()
85
+ def add_query(query, occur)
86
+ add_clause(BooleanClause.new(query, occur))
87
+ end
88
+
89
+ # Adds a clause to a boolean query.
90
+ # raises:: TooManyClauses if the new number of clauses exceeds the
91
+ # maximum clause number. See #max_clause_count()
92
+ def add_clause(clause)
93
+ if @clauses.size >= @@max_clause_count
94
+ raise TooManyClauses
95
+ end
96
+
97
+ @clauses << clause
98
+ end
99
+ alias :<< :add_clause
100
+
101
+ class BooleanWeight < Weight
102
+ attr_accessor :similarity
103
+ attr_accessor :weights
104
+ attr_reader :query
105
+
106
+ def initialize(query, searcher)
107
+ @query = query
108
+ @weights = []
109
+
110
+ @similarity = query.similarity(searcher)
111
+ query.clauses.each do |clause|
112
+ @weights << clause.query.create_weight(searcher)
113
+ end
114
+ end
115
+
116
+ def value()
117
+ return @query.boost()
118
+ end
119
+
120
+ def sum_of_squared_weights()
121
+ sum = 0
122
+ @weights.each_with_index do |weight, i|
123
+ clause = @query.clauses[i]
124
+ if not clause.prohibited?
125
+ sum += weight.sum_of_squared_weights() # sum sub weights
126
+ end
127
+ end
128
+
129
+ sum *= @query.boost() * @query.boost() # boost each sub-weight
130
+
131
+ return sum
132
+ end
133
+
134
+
135
+ def normalize(norm)
136
+ norm *= @query.boost()
137
+ @weights.each_with_index do |weight, i|
138
+ clause = @query.clauses[i]
139
+ if not clause.prohibited?
140
+ weight.normalize(norm)
141
+ end
142
+ end
143
+ end
144
+
145
+ # returns:: An alternative Scorer that uses and provides skip_to(),
146
+ # and scores documents in document number order.
147
+ def scorer(reader)
148
+ result = BooleanScorer.new(@similarity)
149
+
150
+ @weights.each_with_index do |weight, i|
151
+ clause = @query.clauses[i]
152
+ sub_scorer = weight.scorer(reader)
153
+ if (sub_scorer != nil)
154
+ result.add_scorer(sub_scorer, clause.occur)
155
+ elsif (clause.required?())
156
+ return nil
157
+ end
158
+ end
159
+
160
+ return result
161
+ end
162
+
163
+ def explain(reader, doc)
164
+
165
+ sum_expl = Explanation.new()
166
+ sum_expl.description = "sum of:"
167
+ coord = 0
168
+ max_coord = 0
169
+ sum = 0.0
170
+
171
+ @weights.each_with_index do |weight, i|
172
+ clause = @query.clauses[i]
173
+ explanation = weight.explain(reader, doc)
174
+ max_coord += 1 if not clause.prohibited?
175
+ if explanation.value > 0
176
+ if not clause.prohibited?
177
+ sum_expl << explanation
178
+ sum += explanation.value
179
+ coord += 1
180
+ else
181
+ return Explanation.new(0.0, "match prohibited")
182
+ end
183
+ elsif clause.required?
184
+ return Explanation.new(0.0, "match required")
185
+ end
186
+ end
187
+ sum_expl.value = sum
188
+
189
+ if (coord == 1) # only one clause matched
190
+ sum_expl = sum_expl.details[0] # eliminate wrapper
191
+ end
192
+
193
+ coord_factor = @similarity.coord(coord, max_coord)
194
+ if (coord_factor == 1.0) # coord is no-op
195
+ return sum_expl # eliminate wrapper
196
+ else
197
+ result = Explanation.new()
198
+ result.description = "product of:"
199
+ result << sum_expl
200
+ result << Explanation.new(coord_factor, "coord(#{coord}/#{max_coord})")
201
+ result.value = sum * coord_factor
202
+ return result
203
+ end
204
+ end
205
+ end #end BooleanWeight
206
+
207
+ def create_weight(searcher)
208
+ return BooleanWeight.new(self, searcher)
209
+ end
210
+
211
+ def rewrite(reader)
212
+ if @clauses.size == 1 # optimize 1-clause queries
213
+ clause = @clauses[0]
214
+ if not clause.prohibited? # just return clause
215
+
216
+ query = clause.query.rewrite(reader) # rewrite first
217
+
218
+ if boost() != 1.0 # incorporate boost
219
+ if query == clause.query # if rewrite was no-op
220
+ query = query.clone # then clone before boost
221
+ end
222
+ query.boost = boost() * query.boost()
223
+ end
224
+
225
+ return query
226
+ end
227
+ end
228
+
229
+ clone = nil # recursively rewrite
230
+ @clauses.each_with_index do |clause, i|
231
+ query = clause.query().rewrite(reader)
232
+ if query != clause.query() # clause rewrote: must clone
233
+ clone ||= clone()
234
+ clone.clauses[i] = BooleanClause.new(query, clause.occur)
235
+ end
236
+ end
237
+ if (clone != nil)
238
+ return clone # some clauses rewrote
239
+ else
240
+ return self # no clauses rewrote
241
+ end
242
+ end
243
+
244
+ def extract_terms(terms)
245
+ @clauses.each do |clause|
246
+ clause.query.extract_terms(terms)
247
+ end
248
+ end
249
+
250
+ def combine(queries)
251
+ return Query.merge_boolean_queries(queries)
252
+ end
253
+
254
+ def clone()
255
+ clone = super
256
+ clone.clauses = @clauses.clone
257
+ return clone
258
+ end
259
+
260
+ # Prints a user-readable version of this query.
261
+ def to_s(field = nil)
262
+ buffer = ""
263
+ buffer << "(" if boost != 1.0
264
+
265
+ @clauses.each_with_index do |clause, i|
266
+ if clause.prohibited?
267
+ buffer << "-"
268
+ elsif clause.required?
269
+ buffer << "+"
270
+ end
271
+
272
+ sub_query = clause.query
273
+ if sub_query.instance_of? BooleanQuery # wrap sub-bools in parens
274
+ buffer << "(#{clause.query.to_s(field)})"
275
+ else
276
+ buffer << clause.query.to_s(field)
277
+ end
278
+
279
+ if i != (@clauses.size - 1)
280
+ buffer << " "
281
+ end
282
+ end
283
+
284
+ buffer << ")^#{boost}" if boost() != 1.0
285
+
286
+ return buffer
287
+ end
288
+
289
+ # Returns true iff +o+ is equal to this.
290
+ def eql?(other)
291
+ if not other.instance_of?(BooleanQuery)
292
+ return false
293
+ end
294
+ return (boost() == other.boost() and @clauses == other.clauses)
295
+ end
296
+ alias :== :eql?
297
+
298
+ # Returns a hash code value for this object.
299
+ def hash()
300
+ return boost().hash ^ @clauses.hash
301
+ end
302
+ end
303
+ end