ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,294 +0,0 @@
1
- module Ferret::Search
2
- # An alternative to BooleanScorer.
3
- #
4
- # Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer.
5
- #
6
- # Implements skip_to(), and has no limitations on the numbers of added scorers.
7
- class BooleanScorer < Scorer
8
- attr_reader :required_scorers, :coordinator
9
-
10
- class Coordinator
11
- attr_accessor :max_coord, :nr_matchers
12
-
13
- def initialize(similarity)
14
- @max_coord = 0 # to be increased for each non prohibited scorer
15
- @coord_factors = nil
16
- @similarity = similarity
17
- end
18
-
19
-
20
- def init() # use after all scorers have been added.
21
- @coord_factors = Array.new(@max_coord + 1)
22
-
23
- (@max_coord+1).times do |i|
24
- @coord_factors[i] = @similarity.coord(i, @max_coord)
25
- end
26
- end
27
-
28
-
29
- def init_doc()
30
- @nr_matchers = 0
31
- end
32
-
33
- def coord_factor()
34
- return @coord_factors[@nr_matchers]
35
- end
36
- end
37
-
38
- # The scorer to which all scoring will be delegated,
39
- # except for computing and using the coordination factor.
40
-
41
- def initialize(similarity)
42
- super(similarity)
43
- @required_scorers = []
44
- @optional_scorers = []
45
- @prohibited_scorers = []
46
- @counting_sum_scorer = nil
47
- @coordinator = Coordinator.new(similarity)
48
- end
49
-
50
- def add_scorer(scorer, occur)
51
- unless occur == BooleanClause::Occur::MUST_NOT
52
- @coordinator.max_coord += 1
53
- end
54
-
55
- case occur
56
- when BooleanClause::Occur::MUST: @required_scorers << scorer
57
- when BooleanClause::Occur::SHOULD: @optional_scorers << scorer
58
- when BooleanClause::Occur::MUST_NOT: @prohibited_scorers << scorer
59
- end
60
- end
61
-
62
- # Initialize the match counting scorer that sums all the
63
- # scores.
64
- # When "counting" is used in a name it means counting the number
65
- # of matching scorers.<br>
66
- # When "sum" is used in a name it means score value summing
67
- # over the matching scorers
68
- def init_counting_sum_scorer()
69
- @coordinator.init()
70
- @counting_sum_scorer = make_counting_sum_scorer()
71
- end
72
-
73
- # Count a scorer as a single match.
74
- class SingleMatchScorer < Scorer
75
- def initialize(parent_scorer, scorer)
76
- super(scorer.similarity)
77
- @scorer = scorer
78
- @parent_scorer = parent_scorer
79
- end
80
- def score()
81
- @parent_scorer.coordinator.nr_matchers += 1
82
- return @scorer.score
83
- end
84
- def doc()
85
- return @scorer.doc
86
- end
87
- def next?
88
- return @scorer.next?
89
- end
90
- def skip_to(doc_num)
91
- return @scorer.skip_to(doc_num)
92
- end
93
- def explain(doc_num)
94
- return @scorer.explain(doc_num)
95
- end
96
- end
97
-
98
- class CountingDisjunctionSumScorer < DisjunctionSumScorer
99
- def initialize(parent_scorer, scorers)
100
- super(scorers)
101
- @parent_scorer = parent_scorer
102
- end
103
- def score
104
- @parent_scorer.coordinator.nr_matchers += @nr_matchers
105
- return super
106
- end
107
- end
108
-
109
- def counting_disjunction_sum_scorer(scorers)
110
- # each scorer from the list counted as a single matcher
111
-
112
- return CountingDisjunctionSumScorer.new(self, scorers)
113
- end
114
-
115
- class CountingConjunctionScorer < ConjunctionScorer
116
- def initialize(parent_scorer, similarity)
117
- super(similarity)
118
- @parent_scorer = parent_scorer
119
- @required_num_matchers = parent_scorer.required_scorers.size
120
- @last_scored_doc = -1
121
- end
122
- def score
123
- if (@parent_scorer.doc() > @last_scored_doc)
124
- @last_scored_doc = @parent_scorer.doc()
125
- @parent_scorer.coordinator.nr_matchers += @required_num_matchers
126
- end
127
-
128
- return super
129
- end
130
- end
131
-
132
- def counting_conjunction_sum_scorer(required_scorers)
133
- # each scorer from the list counted as a single matcher
134
-
135
- required_num_matchers = required_scorers.size
136
- ccs = CountingConjunctionScorer.new(self, Similarity.default)
137
- @required_scorers.each do |scorer|
138
- ccs << scorer
139
- end
140
- return ccs
141
- end
142
-
143
- # Returns the scorer to be used for match counting and score summing.
144
- # Uses required_scorers, optional_scorers and prohibited_scorers.
145
- def make_counting_sum_scorer()
146
- # each scorer counted as a single matcher
147
- if @required_scorers.size == 0
148
- if @optional_scorers.size == 0
149
- return NonMatchingScorer.new # only prohibited scorers
150
- elsif @optional_scorers.size == 1
151
- return make_counting_sum_scorer2( # the only optional scorer is required
152
- SingleMatchScorer.new(self, @optional_scorers[0]),
153
- []) # no optional scorers left
154
- else # more than 1 @optional_scorers, no required scorers
155
- return make_counting_sum_scorer2( # at least one optional scorer is required
156
- counting_disjunction_sum_scorer(@optional_scorers),
157
- []) # no optional scorers left
158
- end
159
- elsif @required_scorers.size == 1 # 1 required
160
- return make_counting_sum_scorer2(
161
- SingleMatchScorer.new(self, @required_scorers[0]),
162
- @optional_scorers)
163
- else # more required scorers
164
- return make_counting_sum_scorer2(
165
- counting_conjunction_sum_scorer(@required_scorers),
166
- @optional_scorers)
167
- end
168
- end
169
-
170
- # Returns the scorer to be used for match counting and score summing.
171
- # Uses the arguments and prohibited_scorers.
172
- # required_counting_sum_scorer:: A required scorer already built.
173
- # @optional_scorers:: A list of optional scorers, possibly empty.
174
- def make_counting_sum_scorer2(required_counting_sum_scorer, optional_scorers)
175
-
176
- if (optional_scorers.size == 0)
177
- if (@prohibited_scorers.size == 0)
178
- return required_counting_sum_scorer
179
- elsif (@prohibited_scorers.size == 1)
180
- return ReqExclScorer.new(required_counting_sum_scorer,
181
- @prohibited_scorers[0])
182
- else # no optional, more than 1 prohibited
183
- return ReqExclScorer.new(
184
- required_counting_sum_scorer,
185
- DisjunctionSumScorer.new(@prohibited_scorers))
186
- end
187
- elsif (optional_scorers.size == 1)
188
- return make_counting_sum_scorer3(
189
- required_counting_sum_scorer,
190
- SingleMatchScorer.new(self, optional_scorers[0]))
191
- else # more optional
192
- return make_counting_sum_scorer3(
193
- required_counting_sum_scorer,
194
- counting_disjunction_sum_scorer(optional_scorers))
195
- end
196
- end
197
-
198
- # Returns the scorer to be used for match counting and score summing.
199
- # Uses the arguments and prohibited_scorers.
200
- # required_counting_sum_scorer:: A required scorer already built.
201
- # optional_counting_sum_scorer:: An optional scorer already built.
202
- def make_counting_sum_scorer3(required_counting_sum_scorer,
203
- optional_counting_sum_scorer)
204
- if (@prohibited_scorers.size == 0) # no prohibited
205
- return ReqOptSumScorer.new(required_counting_sum_scorer,
206
- optional_counting_sum_scorer)
207
- elsif (@prohibited_scorers.size == 1) # 1 prohibited
208
- return ReqOptSumScorer.new(
209
- ReqExclScorer.new(required_counting_sum_scorer,
210
- @prohibited_scorers[0]),
211
- optional_counting_sum_scorer)
212
- else # more prohibited
213
- return ReqOptSumScorer.new(
214
- ReqExclScorer.new(required_counting_sum_scorer,
215
- DisjunctionSumScorer.new(@prohibited_scorers)),
216
- optional_counting_sum_scorer)
217
- end
218
- end
219
-
220
- # Expert: Iterates over matching all documents, yielding the document
221
- # number and the score.
222
- #
223
- # returns:: true if more matching documents may remain.
224
- def each_hit() # :yields: doc, score
225
- if @counting_sum_scorer.nil?
226
- init_counting_sum_scorer()
227
- end
228
- while @counting_sum_scorer.next?
229
- yield(@counting_sum_scorer.doc, score())
230
- end
231
- end
232
-
233
- # Expert: Iterates over matching documents in a range.
234
- #
235
- # NOTE: that #next? needs to be called first.
236
- #
237
- # max:: Do not score documents past this. Default will search all documents
238
- # avaliable.
239
- # returns:: true if more matching documents may remain.
240
- def each_hit_up_to(max = MAX_DOCS) # :yields: doc, score
241
- # nil pointer exception when next? was not called before:
242
- doc_num = @counting_sum_scorer.doc()
243
- while (doc_num < max)
244
- yield(doc_num, score())
245
- if not @counting_sum_scorer.next?
246
- return false
247
- end
248
- doc_num = @counting_sum_scorer.doc()
249
- end
250
- return true
251
- end
252
-
253
- def doc()
254
- return @counting_sum_scorer.doc
255
- end
256
-
257
- def next?
258
- if (@counting_sum_scorer == nil)
259
- init_counting_sum_scorer()
260
- end
261
- return @counting_sum_scorer.next?
262
- end
263
-
264
- def score()
265
- @coordinator.init_doc()
266
- sum = @counting_sum_scorer.score()
267
- return sum * @coordinator.coord_factor()
268
- end
269
-
270
- # Skips to the first match beyond the current whose document number is
271
- # greater than or equal to a given target.
272
- #
273
- # When this method is used the #explain(int) method should not be used.
274
- #
275
- # target:: The target document number.
276
- # returns:: true iff there is such a match.
277
- def skip_to(target)
278
- if (@counting_sum_scorer == nil)
279
- init_counting_sum_scorer()
280
- end
281
- return @counting_sum_scorer.skip_to(target)
282
- end
283
-
284
- # TODO: Implement an explanation of the coordination factor.
285
- # doc:: The document number for the explanation.
286
- # raises:: UnsupportedOperationException
287
- def explain(doc)
288
- raise NotImplementedError
289
- # How to explain the coordination factor?
290
- #init_counting_sum_scorer()
291
- #return @counting_sum_scorer.explain(doc); # misses coord factor.
292
- end
293
- end
294
- end
@@ -1,40 +0,0 @@
1
- module Ferret::Search
2
- require 'monitor'
3
-
4
- # Wraps another filter's result and caches it. The caching
5
- # behavior is like QueryFilter. The purpose is to allow
6
- # filters to simply filter, and then wrap with this class to add
7
- # caching, keeping the two concerns decoupled yet composable.
8
- class CachingWrapperFilter < Filter
9
- # filter:: Filter to cache results of
10
- def initialize(filter)
11
- @filter = filter
12
- @cache = nil
13
- end
14
-
15
- def bits(reader)
16
- if (@cache == nil)
17
- @cache = Ferret::Utils::WeakKeyHash.new
18
- end
19
-
20
- @cache.synchronize() do # check cache
21
- bits = @cache[reader]
22
- if bits
23
- return bits
24
- end
25
- end
26
-
27
- bits = @filter.bits(reader)
28
-
29
- @cache.synchronize() do # update cache
30
- @cache[reader] = bits
31
- end
32
-
33
- return bits
34
- end
35
-
36
- def to_s()
37
- return "CachingWrapperFilter(#{@filter})"
38
- end
39
- end
40
- end
@@ -1,99 +0,0 @@
1
- require 'set'
2
- module Ferret::Search
3
- # Scorer for conjunctions, sets of queries, all of which are required.
4
- class ConjunctionScorer < Scorer
5
-
6
- def initialize(similarity)
7
- super
8
- @scorers = []
9
- @first_time = true
10
- @more = true
11
- end
12
-
13
- def add(scorer)
14
- @scorers << scorer
15
- end
16
- alias :<< :add
17
-
18
- def first()
19
- return @scorers.first
20
- end
21
-
22
- def last()
23
- return @scorers.last
24
- end
25
-
26
- def doc()
27
- return first().doc()
28
- end
29
-
30
- def next?()
31
- if (@first_time)
32
- init(true)
33
- elsif (@more)
34
- @more = last().next? # trigger further scanning
35
- end
36
- return do_next()
37
- end
38
-
39
- def do_next()
40
- while @more and first().doc < last().doc # find doc w/ all clauses
41
- @more = first().skip_to(last().doc) # skip first upto last
42
- @scorers << @scorers.shift # move first to last
43
- end
44
- return @more # found a doc with all clauses
45
- end
46
-
47
- def skip_to(target)
48
- if(@first_time)
49
- init(false)
50
- end
51
-
52
- @scorers.each do |scorer|
53
- break if not @more
54
- @more = scorer.skip_to(target)
55
- end
56
-
57
- sort_scorers() if @more # resort the scorers
58
-
59
- return do_next()
60
- end
61
-
62
- # Sums the scores of all of the scorers for the current document.
63
- def score()
64
- score = 0.0 # sum scores
65
- @scorers.each do |scorer|
66
- score += scorer.score
67
- end
68
- score *= @coord
69
- return score
70
- end
71
-
72
- def init(init_scorers)
73
- # compute coord factor
74
- @coord = similarity().coord(@scorers.size(), @scorers.size())
75
-
76
- @more = @scorers.size() > 0
77
-
78
- if init_scorers
79
- # move each scorer to its first entry
80
- @scorers.each do |scorer|
81
- break if not @more
82
- @more = scorer.next?
83
- end
84
- sort_scorers() if @more
85
- end
86
-
87
- @first_time = false
88
- end
89
-
90
- def sort_scorers()
91
- # move @scorers to an array
92
- @scorers.sort! {|a,b| a.doc <=> b.doc }
93
- end
94
-
95
- def explain(doc)
96
- raise NotImplementedError
97
- end
98
- end
99
- end
@@ -1,205 +0,0 @@
1
- module Ferret::Search
2
- # A Scorer for OR like queries, counterpart of Lucene's +ConjunctionScorer+.
3
- # This Scorer implements Scorer#skip_to(int) and uses skip_to() on the given Scorers.
4
- class DisjunctionSumScorer < Scorer
5
- # the sub-scorers
6
- attr_accessor :sub_scorers
7
-
8
- # Construct a +DisjunctionScorer+.
9
- # sub_scorers:: A collection of at least two subscorers.
10
- #
11
- # minimum_nr_matchers:: The positive minimum number of subscorers that
12
- # should match to match this query.
13
- #
14
- # When +@minimum_nr_matchers+ is bigger than the number
15
- # of +sub_scorers+,no matches will be produced.
16
- #
17
- # When @minimum_nr_matchers equals the number of
18
- # sub_scorers, it more efficient to use
19
- # +ConjunctionScorer+.
20
- def initialize(sub_scorers, minimum_nr_matchers = 1)
21
- super(nil)
22
-
23
- # The number of subscorers.
24
- @nr_scorers = sub_scorers.size
25
-
26
- # The document number of the current match.
27
- @current_doc = -1
28
- @curret_score = nil
29
- # The number of subscorers that provide the current match.
30
- @nr_matchers = -1
31
-
32
- if (minimum_nr_matchers <= 0)
33
- raise ArgumentError, "Minimum nr of matchers must be positive"
34
- end
35
- if (@nr_scorers <= 1)
36
- raise ArgumentError, "There must be at least 2 sub_scorers"
37
- end
38
-
39
- @minimum_nr_matchers = minimum_nr_matchers
40
- @sub_scorers = sub_scorers
41
-
42
- # The @scorer_queue contains all subscorers ordered by their current
43
- # doc, with the minimum at the top.
44
- #
45
- # The @scorer_queue is initialized the first time next? or skip_to() is
46
- # called.
47
- #
48
- # An exhausted scorer is immediately removed from the @scorer_queue.
49
- #
50
- # If less than the @minimum_nr_matchers scorers remain in the
51
- # @scorer_queue next? and skip_to() return false.
52
- #
53
- # After each to call to next? or skip_to()
54
- # +currentSumScore+ is the total score of the current matching doc,
55
- # +@nr_matchers+ is the number of matching scorers,
56
- # and all scorers are after the matching doc, or are exhausted.
57
- @scorer_queue = nil
58
- end
59
-
60
- # Called the first time next? or skip_to() is called to
61
- # initialize +@scorer_queue+.
62
- def init_scorer_queue()
63
- @scorer_queue = ScorerQueue.new(@nr_scorers)
64
- @sub_scorers.each do |sub_scorer|
65
- if (sub_scorer.next?) # doc() method will be used in @scorer_queue.
66
- @scorer_queue.insert(sub_scorer)
67
- end
68
- end
69
- end
70
-
71
- # A +PriorityQueue+ that orders by Scorer#doc().
72
- class ScorerQueue < Ferret::Utils::PriorityQueue
73
- def less_than(scorer1, scorer2)
74
- return scorer1.doc < scorer2.doc
75
- end
76
- end
77
-
78
- def next?
79
- if (@scorer_queue == nil)
80
- init_scorer_queue()
81
- end
82
-
83
- if (@scorer_queue.size < @minimum_nr_matchers)
84
- return false
85
- else
86
- return advance_after_current()
87
- end
88
- end
89
-
90
-
91
- # Advance all subscorers after the current document determined by the
92
- # top of the +@scorer_queue+.
93
- # Repeat until at least the minimum number of subscorers match on the same
94
- # document and all subscorers are after that document or are exhausted.
95
- #
96
- # On entry the +@scorer_queue+ has at least +@minimum_nr_matchers+
97
- # available. At least the scorer with the minimum document number will be advanced.
98
- # returns:: true iff there is a match.
99
- #
100
- # In case there is a match, +@current_doc+, +currentSumScore+,
101
- # and +@nr_matchers+ describe the match.
102
- #
103
- # TODO Investigate whether it is possible to use skip_to() when
104
- # the minimum number of matchers is bigger than one, ie. begin and use the
105
- # character of ConjunctionScorer for the minimum number of matchers.
106
- def advance_after_current()
107
- begin # repeat until minimum nr of matchers
108
- top = @scorer_queue.top
109
- @current_doc = top.doc
110
- @current_score = top.score
111
- @nr_matchers = 1
112
- begin # Until all subscorers are after @current_doc
113
- if top.next?
114
- @scorer_queue.adjust_top()
115
- else
116
- @scorer_queue.pop()
117
- if (@scorer_queue.size < (@minimum_nr_matchers - @nr_matchers))
118
- # Not enough subscorers left for a match on this document,
119
- # and also no more chance of any further match.
120
- return false
121
- end
122
- if (@scorer_queue.size == 0)
123
- break # nothing more to advance, check for last match.
124
- end
125
- end
126
- top = @scorer_queue.top
127
- if top.doc != @current_doc
128
- break # All remaining subscorers are after @current_doc.
129
- else
130
- @current_score += top.score
131
- @nr_matchers += 1
132
- end
133
- end while (true)
134
-
135
- if (@nr_matchers >= @minimum_nr_matchers)
136
- return true
137
- elsif (@scorer_queue.size < @minimum_nr_matchers)
138
- return false
139
- end
140
- end while (true)
141
- end
142
-
143
- # Returns the score of the current document matching the query.
144
- # Initially invalid, until #next? is called the first time.
145
- def score()
146
- return @current_score
147
- end
148
-
149
- # Returns the document number of the current document matching the query.
150
- # Initially invalid, until #next? is called the first time.
151
- def doc()
152
- return @current_doc
153
- end
154
-
155
- # Returns the number of subscorers matching the current document.
156
- # Initially invalid, until #next? is called the first time.
157
- def number_of_matchers()
158
- return @nr_matchers
159
- end
160
-
161
- # Skips to the first match beyond the current whose document number is
162
- # greater than or equal to a given target.
163
- #
164
- # When this method is used the #explain(int) method should not be used.
165
- #
166
- # The implementation uses the skip_to() method on the subscorers.
167
- # target:: The target document number.
168
- # returns:: true iff there is such a match.
169
- def skip_to(target)
170
- if @scorer_queue.nil?
171
- init_scorer_queue()
172
- end
173
- if @scorer_queue.size < @minimum_nr_matchers
174
- return false
175
- end
176
- if target <= @current_doc
177
- target = @current_doc + 1
178
- end
179
- begin
180
- top = @scorer_queue.top
181
- if top.doc >= target
182
- return advance_after_current()
183
- elsif top.skip_to(target)
184
- @scorer_queue.adjust_top()
185
- else
186
- @scorer_queue.pop()
187
- if (@scorer_queue.size < @minimum_nr_matchers)
188
- return false
189
- end
190
- end
191
- end while (true)
192
- end
193
-
194
- # Gives and explanation for the score of a given document.
195
- # TODO Show the resulting score. See BooleanScorer.explain() on how to do this.
196
- def explain(doc)
197
- e = Explanation.new()
198
- e.description = "At least " + @minimum_nr_matchers + " of"
199
- @sub_scorers.each do |sub_scorer|
200
- e.details << sub_scorer.explain(doc)
201
- end
202
- return e
203
- end
204
- end
205
- end