ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,139 +0,0 @@
1
- module Ferret::Search
2
- # A Query that matches documents within an exclusive range. A RangeQuery
3
- # is built by QueryParser for input like +[010 120]+.
4
- class RangeQuery < Query
5
- include Ferret::Index
6
-
7
- attr_reader :lower_term, :upper_term
8
-
9
- # Constructs a query selecting all terms greater than
10
- # +lower_term+ but less than +upper_term+.
11
- # There must be at least one term and either term may be nil,
12
- # in which case there is no bound on that side, but if there are
13
- # two terms, both terms *must* be for the same field.
14
- #
15
- # field:: The field this range applies to
16
- # lower_term:: The lower bound on this range
17
- # upper_term:: The upper bound on this range
18
- # include_lower:: Does this range include the lower bound?
19
- # include_upper:: Does this range include the upper bound?
20
- def initialize(field, lower_term, upper_term, include_lower, include_upper)
21
- super()
22
- @field = field
23
- @lower_term = lower_term
24
- @upper_term = upper_term
25
- @include_lower = include_lower
26
- @include_upper = include_upper
27
-
28
- if (lower_term.nil? and upper_term.nil?)
29
- raise ArgumentError, "At least one value must be non-nil"
30
- end
31
- if (include_lower and lower_term.nil?)
32
- raise ArgumentError, "The lower bound must be non-nil to be inclusive"
33
- end
34
- if (include_upper and upper_term.nil?)
35
- raise ArgumentError, "The upper bound must be non-nil to be inclusive"
36
- end
37
- if (upper_term and lower_term and upper_term < lower_term)
38
- raise ArgumentError, "The lower bound must less than the upper bound"
39
- end
40
- end
41
-
42
- # Constructs a query for field +field+ matching less than or equal to
43
- # +upper_term+.
44
- def RangeQuery.new_less(field, upper_term, include_upper = true)
45
- return RangeQuery.new(field, nil, upper_term, false, include_upper)
46
- end
47
-
48
- # Constructs a query for field +field+ matching greater than or equal
49
- # to +lower_term+.
50
- def RangeQuery.new_more(field, lower_term, include_lower = true)
51
- return RangeQuery.new(field, lower_term, nil, include_lower, false)
52
- end
53
-
54
- def rewrite(reader)
55
- bq = BooleanQuery.new(true)
56
- term_enum = reader.terms_from(Term.new(@field, @lower_term||""))
57
-
58
- begin
59
- check_lower = !@include_lower
60
- test_field = field()
61
- begin
62
- term = term_enum.term
63
-
64
- break if term.nil? or term.field != @field
65
- if (!check_lower or @lower_term.nil? or term.text > @lower_term)
66
- check_lower = false
67
- if @upper_term
68
- compare = @upper_term <=> term.text
69
-
70
- # if beyond the upper term, or is exclusive and
71
- # this is equal to the upper term, break out
72
- if ((compare < 0) or (not @include_upper and compare == 0))
73
- break
74
- end
75
- end
76
- tq = TermQuery.new(term) # found a match
77
- tq.boost = boost() # set the boost
78
- bq.add_query(tq, BooleanClause::Occur::SHOULD) # add to query
79
- end
80
- end while term_enum.next?
81
- ensure
82
- term_enum.close()
83
- end
84
- return bq
85
- end
86
-
87
- # Returns the field name for this query
88
- attr_reader :field, :lower_term, :upper_term, :include_lower, :include_upper
89
-
90
- # Prints a user-readable version of this query.
91
- def to_s(f=nil)
92
- buffer = ""
93
- buffer << "#{@field}:" if field() != f
94
-
95
- if @lower_term
96
- buffer << (@include_lower ? "[" : "{")
97
- buffer << @lower_term
98
- else
99
- buffer << "<"
100
- end
101
-
102
- buffer << " " if @upper_term and @lower_term
103
-
104
- if @upper_term
105
- buffer << @upper_term
106
- buffer << (@include_upper ? "]" : "}")
107
- else
108
- buffer << ">"
109
- end
110
-
111
- if boost() != 1.0
112
- buffer << "^#{boost()}"
113
- end
114
- return buffer
115
- end
116
-
117
- # Returns true iff +o+ is equal to this.
118
- def eql?(o)
119
- return ((o.instance_of?(RangeQuery)) and
120
- (boost() == o.boost()) and
121
- (@include_upper == o.include_upper) and
122
- (@include_lower == o.include_lower) and
123
- (@upper_term == o.upper_term) and
124
- (@lower_term == o.lower_term) and
125
- (@field == o.field))
126
- end
127
- alias :== :eql?
128
-
129
- # Returns a hash code value for this object.
130
- def hash()
131
- return (boost().hash ^
132
- @field.hash ^
133
- @lower_term.hash ^
134
- @upper_term.hash ^
135
- @include_lower.hash ^
136
- @include_upper.hash)
137
- end
138
- end
139
- end
@@ -1,125 +0,0 @@
1
- module Ferret::Search
2
- # A Scorer for queries with a required subscorer and an excluding (prohibited)
3
- # subscorer.
4
- #
5
- # This +Scorer+ implements Scorer#skip_to(int), and it uses the skip_to() on
6
- # the given scorers.
7
- class ReqExclScorer < Scorer
8
- # Construct a +ReqExclScorer+.
9
- # req_scorer:: The scorer that must match, except where
10
- # excl_scorer:: indicates exclusion.
11
- def initialize(req_scorer, excl_scorer)
12
- super(nil) # No similarity used.
13
- @req_scorer = req_scorer
14
- @excl_scorer = excl_scorer
15
-
16
- @first_time = true
17
- end
18
-
19
-
20
- def next?
21
- if @first_time
22
- if not @excl_scorer.next?
23
- @excl_scorer = nil # exhausted at start
24
- end
25
- @first_time = false
26
- end
27
- if @req_scorer == nil
28
- return false
29
- end
30
- if not @req_scorer.next?
31
- @req_scorer = nil; # exhausted, nothing left
32
- return false
33
- end
34
- if @excl_scorer == nil
35
- return true # @req_scorer.next? already returned true
36
- end
37
- return to_non_excluded()
38
- end
39
-
40
- # Advance to non excluded doc.
41
- # On entry:
42
- #
43
- # * @req_scorer != nil
44
- # * @excl_scorer != nil
45
- # * @req_scorer was advanced once via next? or skip_to() and
46
- # @req_scorer.doc() may still be excluded.
47
- #
48
- # Advances @req_scorer a non excluded required doc, if any.
49
- #
50
- # returns:: true iff there is a non excluded required doc.
51
- def to_non_excluded()
52
- excl_doc = @excl_scorer.doc
53
- begin
54
- req_doc = @req_scorer.doc # may be excluded
55
- if (req_doc < excl_doc)
56
- return true # @req_scorer advanced to before @excl_scorer, ie. not excluded
57
- elsif (req_doc > excl_doc)
58
- unless @excl_scorer.skip_to(req_doc)
59
- @excl_scorer = nil # exhausted, no more exclusions
60
- return true
61
- end
62
- excl_doc = @excl_scorer.doc
63
- if excl_doc > req_doc
64
- return true; # not excluded
65
- end
66
- end
67
- end while @req_scorer.next?
68
- @req_scorer = nil; # exhausted, nothing left
69
- return false
70
- end
71
-
72
- # @req_scorer may be nil when next? or skip_to() already return false so
73
- # only call when you know that a doc exists
74
- def doc()
75
- return @req_scorer.doc
76
- end
77
-
78
- # Returns the score of the current document matching the query.
79
- #
80
- # Initially invalid, until #next? is called the first time.
81
- #
82
- # returns:: The score of the required scorer.
83
- def score()
84
- return @req_scorer.score()
85
- end
86
-
87
- # Skips to the first match beyond the current whose document number is
88
- # greater than or equal to a given target.
89
- #
90
- # When this method is used the #explain(int) method should not be used.
91
- #
92
- # target:: The target document number.
93
- # returns:: true iff there is such a match.
94
- def skip_to(target)
95
- if (@first_time)
96
- @first_time = false
97
- if (! @excl_scorer.skip_to(target))
98
- @excl_scorer = nil; # exhausted
99
- end
100
- end
101
- if (@req_scorer == nil)
102
- return false
103
- end
104
- if (@excl_scorer == nil)
105
- return @req_scorer.skip_to(target)
106
- end
107
- if (! @req_scorer.skip_to(target))
108
- @req_scorer = nil
109
- return false
110
- end
111
- return to_non_excluded()
112
- end
113
-
114
- def explain(doc)
115
- e = Explanation.new()
116
- if @excl_scorer.skip_to(doc) and @excl_scorer.doc == doc
117
- e.description = "excluded"
118
- else
119
- e.description = "not excluded"
120
- e.details << @req_scorer.explain(doc)
121
- end
122
- return e
123
- end
124
- end
125
- end
@@ -1,70 +0,0 @@
1
- module Ferret::Search
2
- # A Scorer for queries with a required part and an optional part.
3
- # Delays skip_to() on the optional part until a score() is needed.
4
- #
5
- # This +Scorer+ implements Scorer#skip_to(int).
6
- class ReqOptSumScorer < Scorer
7
- # The scorers passed from the constructor.
8
- # These are set to nil as soon as their next? or skip_to() returns false.
9
- #
10
- # Construct a +ReqOptScorer+.
11
- # req_scorer:: The required scorer. This must match.
12
- # opt_scorer:: The optional scorer. This is used for scoring only.
13
- def initialize(req_scorer, opt_scorer)
14
- super(nil) # No similarity used.
15
- @req_scorer = req_scorer
16
- @opt_scorer = opt_scorer
17
-
18
- @first_time_opt_scorer = true
19
- end
20
-
21
-
22
- def next?
23
- return @req_scorer.next?
24
- end
25
-
26
- def skip_to(target)
27
- return @req_scorer.skip_to(target)
28
- end
29
-
30
- def doc()
31
- return @req_scorer.doc()
32
- end
33
-
34
- # Returns the score of the current document matching the query.
35
- # Initially invalid, until #next? is called the first time.
36
- #
37
- # returns:: The score of the required scorer, eventually increased by the
38
- # score of the optional scorer when it also matches the current
39
- # document.
40
- def score()
41
- cur_doc = @req_scorer.doc
42
- req_score = @req_scorer.score
43
- if @first_time_opt_scorer
44
- @first_time_opt_scorer = false
45
- if not @opt_scorer.skip_to(cur_doc)
46
- @opt_scorer = nil
47
- return req_score
48
- end
49
- elsif @opt_scorer.nil?
50
- return req_score
51
- elsif @opt_scorer.doc < cur_doc and not @opt_scorer.skip_to(cur_doc)
52
- @opt_scorer = nil
53
- return req_score
54
- end
55
- # assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc)
56
- return (@opt_scorer.doc == cur_doc) ? req_score + @opt_scorer.score() : req_score
57
- end
58
-
59
- # Explain the score of a document.
60
- # @todo Also show the total score.
61
- # See BooleanScorer.explain() on how to do this.
62
- def explain(doc)
63
- e = Explanation.new()
64
- e.description = "required, optional"
65
- e.details << @req_scorer.explain(doc)
66
- e.details << @opt_scorer.explain(doc)
67
- return e
68
- end
69
- end
70
- end
@@ -1,38 +0,0 @@
1
- module Ferret::Search
2
- # Expert: Returned by low-level search implementations.
3
- # See TopDocs
4
- class ScoreDoc
5
- include Comparable
6
- # Expert: The score of this document for the query.
7
- attr_accessor :score
8
-
9
- # Expert: A hit document's number.
10
- attr_accessor :doc
11
-
12
- # Expert: Constructs a ScoreDoc.
13
- def initialize(doc, score)
14
- @doc = doc
15
- @score = score
16
- end
17
-
18
- # returns a hash value for storage in a Hash
19
- def hash()
20
- return 100 * doc * score
21
- end
22
-
23
- # score_docA < score_docB if score_docA.score < score_docB.score or
24
- # score_docA.doc > score_docB.doc
25
- def <=>(other)
26
- result = @score.<=>(other.score)
27
- if (result == 0)
28
- return other.doc.<=>(@doc)
29
- else
30
- return result
31
- end
32
- end
33
-
34
- def to_s
35
- "#{@doc} -> %0.2f" % @score
36
- end
37
- end
38
- end
@@ -1,114 +0,0 @@
1
- module Ferret::Search
2
- # Expert: Compares two ScoreDoc objects for sorting.
3
- class ScoreDocComparator
4
-
5
- # Special comparator for sorting hits according to computed relevance (score).
6
- RELEVANCE = ScoreDocComparator.new()
7
- class <<RELEVANCE
8
- def compare(i, j)
9
- return j.score <=> i.score
10
- end
11
- def sort_value(i)
12
- return i.score
13
- end
14
- def sort_type()
15
- return SortField::SortType::SCORE
16
- end
17
- end
18
-
19
-
20
- # Special comparator for sorting hits according to index order (number).
21
- INDEX_ORDER = ScoreDocComparator.new()
22
- class <<INDEX_ORDER
23
- def compare(i, j)
24
- return i.doc <=> j.doc
25
- end
26
- def sort_value(i)
27
- return i.doc
28
- end
29
- def sort_type()
30
- return SortField::SortType::DOC
31
- end
32
- end
33
-
34
-
35
- # Compares two ScoreDoc objects and returns a result indicating their
36
- # sort order.
37
- # i:: First ScoreDoc
38
- # j:: Second ScoreDoc
39
- # returns:: +-1+ if +i+ should come before +j+
40
- # +1+ if +i+ should come after +j+
41
- # +0+ if they are equal
42
- def compare(i, j)
43
- return NotImplementedError
44
- end
45
-
46
-
47
- # Returns the value used to sort the given document. The object returned
48
- # must implement the java.io.Serializable interface. This is used by
49
- # multisearchers to determine how to collate results from their searchers.
50
- #
51
- # See FieldDoc
52
- # i:: Document
53
- # returns:: Serializable object
54
- def sort_value(i)
55
- return NotImplementedError
56
- end
57
-
58
-
59
- # Returns the type of sort. Should return +SortField.SCORE+,
60
- # +SortField.DOC+, +SortField.STRING+, +SortField.INTEGER+,
61
- # +SortField.FLOAT+ or +SortField.CUSTOM+. It is not valid to return
62
- # +SortField.AUTO+.
63
- # This is used by multisearchers to determine how to collate results from
64
- # their searchers. returns:: One of the constants in SortField.
65
- # See SortField
66
- def sort_type()
67
- return NotImplementedError
68
- end
69
- end
70
-
71
- class SimpleFieldComparator < ScoreDocComparator
72
- def initialize(index, sort_type)
73
- @index = index
74
- @sort_type = sort_type
75
- end
76
-
77
- def compare(i, j)
78
- return @index[i.doc] <=> @index[j.doc]
79
- end
80
- def sort_value(i)
81
- return @index[i.doc]
82
- end
83
- def sort_type()
84
- return @sort_type
85
- end
86
- end
87
-
88
- class SpecialFieldComparator < SimpleFieldComparator
89
- def initialize(index, sort_type, comparator)
90
- super(index, sort_type)
91
- @comparator = comparator
92
- end
93
- def compare(i, j)
94
- return @comparator.call(@index[i.doc], @index[j.doc])
95
- end
96
- end
97
-
98
- class StringFieldComparator < ScoreDocComparator
99
- def initialize(index)
100
- @str_index = index.str_index
101
- @str_map = index.str_map
102
- end
103
-
104
- def compare(i, j)
105
- return @str_index[i.doc] <=> @str_index[j.doc]
106
- end
107
- def sort_value(i)
108
- return @str_map[@str_index[i.doc]]
109
- end
110
- def sort_type()
111
- return SortField::SortType::STRING
112
- end
113
- end
114
- end