ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,25 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Base class for span-based queries.
3
- class SpanQuery < Ferret::Search::Query
4
- # Expert: Returns the matches for this query in an index. Used internally
5
- # to search for spans.
6
- def spans(reader)
7
- raise NotImplementedError
8
- end
9
-
10
- # Returns the name of the field matched by this query.
11
- def field()
12
- raise NotImplementedError
13
- end
14
-
15
- # Returns a collection of all terms matched by this query.
16
- def terms()
17
- raise NotImplementedError
18
- end
19
-
20
- def create_weight(searcher)
21
- return SpanWeight.new(self, searcher)
22
- end
23
- end
24
- end
25
-
@@ -1,74 +0,0 @@
1
- module Ferret::Search::Spans
2
- class SpanScorer < Ferret::Search::Scorer
3
- include Ferret::Search
4
-
5
- def initialize(spans, weight, similarity, norms)
6
- @first_time = true
7
- @more = true
8
-
9
- super(similarity)
10
- @spans = spans
11
- @norms = norms
12
- @weight = weight
13
- @value = weight.value()
14
- @freq = 0.0
15
- end
16
-
17
- def next?
18
- if (@first_time)
19
- @more = @spans.next?
20
- @first_time = false
21
- end
22
-
23
- return false if not @more
24
-
25
- @freq = 0.0
26
- @doc = @spans.doc
27
-
28
- while (@more and @doc == @spans.doc)
29
- match_length = @spans.finish - @spans.start
30
- @freq += similarity().sloppy_freq(match_length)
31
- @more = @spans.next?
32
- end
33
-
34
- return (@more or @freq != 0.0)
35
- end
36
-
37
- def doc() return @doc end
38
-
39
- def score()
40
- raw = similarity().tf(@freq) * @value # raw score
41
- # normalize
42
- return raw * Similarity.decode_norm(@norms[@doc])
43
- end
44
-
45
- def skip_to(target)
46
- @more = @spans.skip_to(target)
47
-
48
- return false if not @more
49
-
50
- @freq = 0.0
51
- @doc = @spans.doc()
52
-
53
- while (@more and @spans.doc() == target)
54
- @freq += similarity().sloppy_freq(@spans.finish - @spans.start)
55
- @more = @spans.next?
56
- end
57
-
58
- return (@more or @freq != 0.0)
59
- end
60
-
61
- def explain(doc)
62
- tf_explanation = Explanation.new()
63
-
64
- skip_to(doc)
65
-
66
- phrase_freq = ((doc() == doc) ? @freq : 0.0)
67
- tf_explanation.value = similarity().tf(phrase_freq)
68
- tf_explanation.description = "tf(phrase_freq=#{phrase_freq})"
69
-
70
- return tf_explanation
71
- end
72
-
73
- end
74
- end
@@ -1,105 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Matches spans containing a term.
3
- class SpanTermQuery < SpanQuery
4
- # Construct a SpanTermQuery matching the named term's spans.
5
- def initialize(term)
6
- super()
7
- @term = term
8
- end
9
-
10
- # Return the term whose spans are matched.
11
- def term() @term end
12
-
13
- def field() @term.field() end
14
-
15
- def terms() [@term] end
16
-
17
- def to_s(field = nil)
18
- if @term.field == field
19
- return @term.text
20
- else
21
- return @term.to_s
22
- end
23
- end
24
-
25
- # Returns true iff +o+ is equal to this.
26
- def eql?(o)
27
- return (o.instance_of?(SpanTermQuery) and boost() == o.boost and @term == o.term)
28
- end
29
- alias :== :eql?
30
-
31
- # Returns a hash code value for this object.
32
- def hash()
33
- return boost().hash ^ @term.hash
34
- end
35
-
36
- def spans(reader)
37
- return SpanTermEnum.new(self, reader)
38
- end
39
-
40
- class SpanTermEnum < SpansEnum
41
- def initialize(query, reader)
42
- @query = query
43
- @positions = reader.term_positions_for(@query.term)
44
- @position = -1
45
- @doc = -1
46
- @count = 0
47
- @freq = 0
48
- end
49
-
50
- def next?
51
- if (@count == @freq)
52
- if not @positions.next?
53
- @doc = Ferret::Search::Scorer::MAX_DOCS
54
- return false
55
- end
56
- @doc = @positions.doc()
57
- @freq = @positions.freq()
58
- @count = 0
59
- end
60
- @position = @positions.next_position()
61
- @count += 1
62
- return true
63
- end
64
-
65
- def skip_to(target)
66
- # are we already at the correct position?
67
- if (@doc >= target)
68
- return true
69
- end
70
-
71
- if not @positions.skip_to(target)
72
- @doc = Ferret::Search::Scorer::MAX_DOCS
73
- return false
74
- end
75
-
76
- @doc = @positions.doc()
77
- @freq = @positions.freq()
78
- @count = 0
79
-
80
- @position = @positions.next_position()
81
- @count += 1
82
-
83
- return true
84
- end
85
-
86
- def doc() @doc end
87
- def start() @position end
88
- def finish() @position + 1 end
89
-
90
- def to_s()
91
- buffer = "spans(#{@query})@"
92
- if @doc < 0
93
- buffer << "START"
94
- else
95
- if @doc == Ferret::Search::Scorer::MAX_DOCS
96
- buffer << "END"
97
- else
98
- buffer << "#{@doc}-#{@position}"
99
- end
100
- end
101
- return buffer
102
- end
103
- end
104
- end
105
- end
@@ -1,84 +0,0 @@
1
- module Ferret::Search::Spans
2
- class SpanWeight < Ferret::Search::Weight
3
- include Ferret::Search
4
- def initialize(query, searcher)
5
- @similarity = query.similarity(searcher)
6
- @query = query
7
- @terms = query.terms()
8
-
9
- @idf = @similarity.idf_phrase(@terms, searcher)
10
- end
11
-
12
- attr_reader :query, :value
13
-
14
- def sum_of_squared_weights()
15
- @query_weight = @idf * @query.boost() # compute query weight
16
- return @query_weight * @query_weight # square it
17
- end
18
-
19
- def normalize(query_norm)
20
- @query_norm = query_norm
21
- @query_weight *= query_norm # normalize query weight
22
- @value = @query_weight * @idf # idf for document
23
- end
24
-
25
- def scorer(reader)
26
- return SpanScorer.new(@query.spans(reader), self,
27
- @similarity,
28
- reader.get_norms(@query.field))
29
- end
30
-
31
- def explain(reader, doc)
32
- result = Explanation.new()
33
- result.description = "weight(#{@query} in #{doc}), product of:"
34
- field = @query.field
35
-
36
- doc_freqs = @terms.map {|t| "#{t.text}=#{reader.doc_freq(t)}"}.join(' ')
37
-
38
- idf_expl = Explanation.new(@idf, "idf(#{field}: #{doc_freqs})")
39
-
40
- # explain query weight
41
- query_expl = Explanation.new()
42
- query_expl.description = "query_weight(#{@query}), product of:"
43
-
44
- boost_expl = Explanation.new(@query.boost, "boost")
45
- query_expl << boost_expl if (@query.boost != 1.0)
46
- query_expl << idf_expl
47
-
48
- query_norm_expl = Explanation.new(@query_norm,"query_norm")
49
- query_expl << query_norm_expl
50
-
51
- query_expl.value = boost_expl.value * idf_expl.value * query_norm_expl.value
52
-
53
- result << query_expl
54
-
55
- # explain field weight
56
- field_expl = Explanation.new()
57
- field_expl.description = "field_weight(#{field}:#{@query.to_s(field)}"+
58
- " in #{doc}), product of:"
59
-
60
- tf_expl = scorer(reader).explain(doc)
61
- field_expl << tf_expl
62
- field_expl << idf_expl
63
-
64
- field_norm_expl = Explanation.new()
65
- field_norms = reader.get_norms(field)
66
- field_norm = (field_norms ? Similarity.decode_norm(field_norms[doc]) : 0.0)
67
- field_norm_expl.value = field_norm
68
- field_norm_expl.description = "field_norm(field=#{field}, doc=#{doc})"
69
- field_expl << field_norm_expl
70
-
71
- field_expl.value = tf_expl.value * idf_expl.value * field_norm_expl.value
72
-
73
- result << field_expl
74
-
75
- # combine them
76
- result.value = query_expl.value * field_expl.value
77
-
78
- if (query_expl.value == 1.0)
79
- return field_expl
80
- end
81
- return result
82
- end
83
- end
84
- end
@@ -1,44 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Expert: an enumeration of span matches. Used to implement span searching.
3
- # Each span represents a range of term positions within a document. Matches
4
- # are enumerated in order, by increasing document number, within that by
5
- # increasing start position and finally by increasing finish position.
6
- class SpansEnum
7
- # Move to the next match, returning true iff any such exists.
8
- def next?()
9
- raise NotImplementedError
10
- end
11
-
12
- # Skips to the first match beyond the current, whose document number is
13
- # greater than or equal to _target_. Returns true iff there is such a
14
- # match. Behaves as if written:
15
- #
16
- # def skip_to(target)
17
- # begin
18
- # return false if (!next?)
19
- # end while (target > doc)
20
- # return true
21
- # end
22
- #
23
- # Most implementations are considerably more efficient than that.
24
- def skip_to(target)
25
- raise NotImplementedError
26
- end
27
-
28
- # Returns the document number of the current match. Initially invalid.
29
- def doc()
30
- raise NotImplementedError
31
- end
32
-
33
-
34
- # Returns the start position of the current match. Initially invalid.
35
- def start()
36
- raise NotImplementedError
37
- end
38
-
39
- # Returns the finish position of the current match. Initially invalid.
40
- def finish()
41
- raise NotImplementedError
42
- end
43
- end
44
- end
@@ -1,128 +0,0 @@
1
- module Ferret::Search
2
- # A Query that matches documents containing a @term.
3
- # This may be combined with other terms with a BooleanQuery.
4
- class TermQuery < Query
5
-
6
- attr_reader :term
7
-
8
- class TermWeight < Weight
9
- attr_reader :value, :query
10
-
11
- def initialize(query, searcher)
12
- @similarity = query.similarity(searcher)
13
- @idf = @similarity.idf(searcher.doc_freq(query.term),
14
- searcher.max_doc) # compute idf
15
- @query = query
16
- @value = 0
17
- end
18
-
19
- def to_s() return "TermWeight(#{@value})"; end
20
-
21
- def sum_of_squared_weights()
22
- @query_weight = @idf * @query.boost() # compute query weight
23
- return @query_weight * @query_weight # square it
24
- end
25
-
26
- def normalize(query_norm)
27
- @query_norm = query_norm
28
- @query_weight *= query_norm # normalize query weight
29
- @value = @query_weight * @idf # idf for document
30
- end
31
-
32
- def scorer(reader)
33
- term_docs = reader.term_docs_for(@query.term)
34
-
35
- return nil if term_docs.nil?
36
-
37
- return TermScorer.new(self, term_docs, @similarity,
38
- reader.get_norms(@query.term.field))
39
- end
40
-
41
- def explain(reader, doc)
42
- explanation = Explanation.new()
43
- explanation.description = "weight(#{@query} in #{doc}), product of:"
44
-
45
- idf_expl = Explanation.new(@idf, "idf(doc_freq=#{reader.doc_freq(@query.term)})")
46
-
47
- # explain query weight
48
- query_expl = Explanation.new(nil, "query_weight(#{@query}), product of:")
49
-
50
- boost_expl = Explanation.new(@query.boost(), "boost")
51
- if (@query.boost() != 1.0)
52
- query_expl << boost_expl
53
- end
54
- query_expl << idf_expl
55
-
56
- query_norm_expl = Explanation.new(@query_norm||0.0,"query_norm")
57
- query_expl << query_norm_expl
58
-
59
- query_expl.value = boost_expl.value * idf_expl.value * query_norm_expl.value
60
-
61
- explanation << query_expl
62
-
63
- # explain field weight
64
- field_name = @query.term.field
65
- field_expl = Explanation.new()
66
- field_expl.description = "field_weight(#{@query.term} in #{doc}), product of:"
67
-
68
- tf_expl = scorer(reader).explain(doc)
69
- field_expl << (tf_expl)
70
- field_expl << (idf_expl)
71
-
72
- field_norms = reader.get_norms(field_name)
73
- field_norm = field_norms.nil? ? 0.0 : Similarity.decode_norm(field_norms[doc])
74
- field_norm_expl = Explanation.new(field_norm,
75
- "field_norm(field=#{field_name}, doc=#{doc})")
76
- field_expl << field_norm_expl
77
-
78
- field_expl.value = tf_expl.value * idf_expl.value * field_norm_expl.value
79
- explanation << field_expl
80
-
81
- # combine them
82
- explanation.value = (query_expl.value * field_expl.value)
83
-
84
- if (query_expl.value == 1.0)
85
- return field_expl
86
- end
87
-
88
- return explanation
89
- end
90
- end
91
-
92
- # Constructs a query for the @query.term +t+.
93
- def initialize(t)
94
- super()
95
- @term = t
96
- end
97
-
98
- def create_weight(searcher)
99
- return TermWeight.new(self, searcher)
100
- end
101
-
102
- def extract_terms(terms)
103
- terms << @term
104
- end
105
-
106
- # Prints a user-readable version of this query.
107
- def to_s(field = nil)
108
- buffer = ""
109
- buffer << "#{@term.field}:" if field != @term.field
110
- buffer << "#{@term.text}"
111
- buffer << "^#{@boost}" if @boost != 1.0
112
- return buffer
113
- end
114
-
115
- # Returns true iff +o+ is equal to this.
116
- def eql?(other)
117
- return false if not other.instance_of?(TermQuery)
118
- return (@boost == other.boost and @term == other.term)
119
- end
120
- alias :== :eql?
121
-
122
- # Returns a hash code value for this object.
123
- def hash()
124
- return @boost.hash ^ @term.hash
125
- end
126
-
127
- end
128
- end