ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,79 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Matches spans near the beginning of a field.
3
- class SpanFirstQuery < SpanQuery
4
- # Construct a SpanFirstQuery matching spans in +match+ whose finish
5
- # position is less than or equal to +finish+.
6
- def initialize(match, finish)
7
- super()
8
- @match = match
9
- @finish = finish
10
- end
11
-
12
- # Return the SpanQuery whose matches are filtered.
13
- def match() @match end
14
-
15
- # Return the maximum finish position permitted in a match.
16
- def finish() @finish end
17
-
18
- def field() @match.field() end
19
-
20
- def terms() @match.terms() end
21
-
22
- def to_s(field = nil)
23
- return "span_first(#{@match.to_s(field)}, #{finish})"
24
- end
25
-
26
- def spans(reader)
27
- SpanFirstEnum.new(self, reader)
28
- end
29
-
30
- class SpanFirstEnum < SpansEnum
31
- def initialize(query, reader)
32
- super()
33
- @query = query
34
- @spans = @query.match.spans(reader)
35
- end
36
-
37
- def next?()
38
- while (@spans.next?()) # scan to next match
39
- return true if (finish() <= @query.finish)
40
- end
41
- return false
42
- end
43
-
44
- def skip_to(target)
45
- if not @spans.skip_to(target)
46
- return false
47
- end
48
-
49
- if (@spans.finish <= @query.finish) # there is a match
50
- return true
51
- end
52
-
53
- return next?() # scan to next match
54
- end
55
-
56
- def doc() @spans.doc() end
57
- def start() @spans.start() end
58
- def finish() @spans.finish() end
59
-
60
- def to_s() "spans(#{@query})" end
61
- end
62
-
63
-
64
- def rewrite(reader)
65
- clone = nil
66
- rewritten = @match.rewrite(reader)
67
- if (rewritten != @match)
68
- clone = self.clone()
69
- clone.match = rewritten
70
- end
71
-
72
- if (clone != nil)
73
- return clone # some clauses rewrote
74
- else
75
- return self # no clauses rewrote
76
- end
77
- end
78
- end
79
- end
@@ -1,108 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Matches spans which are near one another. One can specify _slop_, the
3
- # maximum number of intervening unmatched positions, as well as whether
4
- # matches are required to be in-order.
5
- class SpanNearQuery < SpanQuery
6
-
7
- # Construct a SpanNearQuery. Matches spans matching a span from each
8
- # clause, with up to +slop+ total unmatched positions between them. When
9
- # +in_order+ is true, the spans from each clause must be ordered as in
10
- # +clauses+.
11
- def initialize(clauses, slop, in_order)
12
- super()
13
- # copy clauses array into an ArrayList
14
- @clauses = Array.new(clauses.length)
15
- @field = nil
16
- clauses.each_index do |i|
17
- clause = clauses[i]
18
- if i == 0 # check field
19
- @field = clause.field()
20
- elsif clause.field() != @field
21
- raise ArgumentError, "Clauses must have same field."
22
- end
23
- @clauses[i] = clause
24
- end
25
-
26
- @slop = slop
27
- @in_order = in_order
28
- end
29
-
30
- # Return the clauses whose spans are matched.
31
- def clauses() @clauses end
32
-
33
- # Return the maximum number of intervening unmatched positions permitted.
34
- def slop() @slop end
35
-
36
- # Return true if matches are required to be in-order.
37
- def in_order?() @in_order end
38
-
39
- attr_reader :field
40
-
41
- def terms()
42
- terms = []
43
- @clauses.each do |clause|
44
- terms += clause.terms
45
- end
46
- return terms
47
- end
48
-
49
- def to_s(field = nil)
50
- buffer = "span_near(["
51
- buffer << @clauses.map {|c| c.to_s(field)}.join(", ")
52
- buffer << "], #{@slop}, #{@in_order})"
53
- return buffer
54
- end
55
-
56
- def spans(reader)
57
- if (@clauses.size() == 0) # optimize 0-clause case
58
- return SpanOrQuery.new(@clauses).spans(reader)
59
- end
60
-
61
- if (@clauses.size() == 1) # optimize 1-clause case
62
- return @clauses[0].spans(reader)
63
- end
64
-
65
- return NearSpansEnum.new(self, reader)
66
- end
67
-
68
- def rewrite(reader)
69
- clone = nil
70
- @clauses.each_index do |i|
71
- clause = @clauses[i]
72
- query = clause.rewrite(reader)
73
- if (query != clause) # clause rewrote: must clone
74
- if (clone == nil)
75
- clone = self.clone()
76
- end
77
- clone.clauses[i] = query
78
- end
79
- end
80
- if (clone != nil)
81
- return clone # some clauses rewrote
82
- else
83
- return self # no clauses rewrote
84
- end
85
- end
86
-
87
- # Returns true iff +o+ is equal to this.
88
- def eql?(o)
89
- return false if (o == nil or self.class() != o.class())
90
-
91
- return false if (@in_order != o.in_order?)
92
- return false if (@slop != o.slop)
93
- return false if (@clauses != o.clauses)
94
- return false if (@field != o.field)
95
-
96
- return true
97
- end
98
- alias :== :eql?
99
-
100
- def hash()
101
- result = @clauses.hash()
102
- result += @slop * 29
103
- result += (@in_order ? 1 : 0)
104
- result ^= @field.hash()
105
- return result
106
- end
107
- end
108
- end
@@ -1,130 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Removes matches which overlap with another SpanQuery.
3
- class SpanNotQuery < SpanQuery
4
- # Construct a SpanNotQuery matching spans from +incl+ which
5
- # have no overlap with spans from +excl+.
6
- def initialize(incl, excl)
7
- super()
8
- @incl = incl
9
- @excl = excl
10
-
11
- if incl.field != excl.field
12
- raise ArgumentError, "Clauses must have same field."
13
- end
14
- end
15
-
16
- # Return the SpanQuery whose matches are filtered.
17
- def incl() @incl end
18
-
19
- # Return the SpanQuery whose matches must not overlap those returned.
20
- def excl() @excl end
21
-
22
- def field() @incl.field() end
23
-
24
- def terms() @incl.terms() end
25
-
26
- def to_s(field = nil)
27
- return "span_not(#{incl.to_s(field)}, #{excl.to_s(field)})"
28
- end
29
-
30
- def spans(reader)
31
- return SpanNotEnum.new(self, reader)
32
- end
33
-
34
- class SpanNotEnum < SpansEnum
35
- def initialize(query, reader)
36
- @query = query
37
- @incl_spans = @query.incl.spans(reader)
38
- @more_incl = true
39
- @excl_spans = @query.excl.spans(reader)
40
- @more_excl = @excl_spans.next? # excl_spans needs to be preset
41
- end
42
-
43
- def next?()
44
- if (@more_incl) # move to next incl
45
- @more_incl = @incl_spans.next?()
46
- end
47
-
48
- while (@more_incl and @more_excl)
49
- if (@incl_spans.doc > @excl_spans.doc) # skip excl
50
- @more_excl = @excl_spans.skip_to(@incl_spans.doc)
51
- end
52
-
53
- while (@more_excl and # while excl is before
54
- @incl_spans.doc == @excl_spans.doc and
55
- @excl_spans.finish <= @incl_spans.start)
56
- @more_excl = @excl_spans.next? # increment excl
57
- end
58
-
59
- if (not @more_excl or # if no intersection
60
- @incl_spans.doc != @excl_spans.doc or
61
- @incl_spans.finish <= @excl_spans.start)
62
- break # we found a match
63
- end
64
-
65
- @more_incl = @incl_spans.next? # intersected: keep scanning
66
- end
67
- return @more_incl
68
- end
69
-
70
- def skip_to(target)
71
- if @more_incl # skip incl
72
- @more_incl = @incl_spans.skip_to(target)
73
- end
74
-
75
- if not @more_incl
76
- return false
77
- end
78
-
79
- if (@more_excl and @incl_spans.doc > @excl_spans.doc) # skip excl
80
- @more_excl = @excl_spans.skip_to(@incl_spans.doc)
81
- end
82
-
83
- while (@more_excl and # while excl is before
84
- @incl_spans.doc == @excl_spans.doc and
85
- @excl_spans.finish <= @incl_spans.start)
86
- @more_excl = @excl_spans.next? # increment excl
87
- end
88
-
89
- if (not @more_excl or # if no intersection
90
- @incl_spans.doc != @excl_spans.doc or
91
- @incl_spans.finish <= @excl_spans.start)
92
- return true # we found a match
93
- end
94
-
95
- return next?() # scan to next match
96
- end
97
-
98
- def doc() @incl_spans.doc end
99
- def start() @incl_spans.start end
100
- def finish() @incl_spans.finish end
101
-
102
- def to_s()
103
- return "spans(#{@query})"
104
- end
105
- end
106
-
107
- def rewrite(reader)
108
- clone = nil
109
-
110
- rewritten_incl = @incl.rewrite(reader)
111
- if (rewritten_incl != @incl)
112
- clone = self.clone()
113
- clone.incl = rewritten_incl
114
- end
115
-
116
- rewritten_excl = @excl.rewrite(reader)
117
- if (rewritten_excl != @excl)
118
- clone = self.clone() if (clone == nil)
119
- clone.excl = rewritten_excl
120
- end
121
-
122
- if (clone != nil)
123
- return clone # some clauses rewrote
124
- else
125
- return self # no clauses rewrote
126
- end
127
- end
128
-
129
- end
130
- end
@@ -1,176 +0,0 @@
1
- module Ferret::Search::Spans
2
- # Matches the union of its clauses.
3
- class SpanOrQuery < SpanQuery
4
-
5
- # Construct a SpanOrQuery merging the provided clauses.
6
- def initialize(clauses)
7
- super()
8
-
9
- # copy clauses array into an ArrayList
10
- @clauses = Array.new(clauses.length)
11
- @field = nil
12
- clauses.each_index do |i|
13
- clause = clauses[i]
14
- if i == 0 # check field
15
- @field = clause.field()
16
- elsif clause.field() != @field
17
- raise ArgumentError, "Clauses must have same field."
18
- end
19
- @clauses[i] = clause
20
- end
21
- end
22
-
23
- # Return the clauses whose spans are matched.
24
- def clauses() @clauses end
25
-
26
- attr_reader :field
27
-
28
- def terms()
29
- terms = []
30
- @clauses.each do |clause|
31
- terms += clause.terms
32
- end
33
- return terms
34
- end
35
-
36
- def rewrite(reader)
37
- clone = nil
38
- @clauses.each_index do |i|
39
- clause = @clauses[i]
40
- query = clause.rewrite(reader)
41
- if (query != clause) # clause rewrote: must clone
42
- if (clone == nil)
43
- clone = self.clone()
44
- end
45
- clone.clauses[i] = query
46
- end
47
- end
48
- if (clone != nil)
49
- return clone # some clauses rewrote
50
- else
51
- return self # no clauses rewrote
52
- end
53
- end
54
-
55
- def to_s(field = nil)
56
- buffer = "spanOr(["
57
- buffer << @clauses.map {|c| c.to_s(field()) }.join(", ")
58
- buffer << "])"
59
- return buffer
60
- end
61
-
62
- def eql?(o)
63
- return false if (o == nil or self.class() != o.class())
64
-
65
- return false if (@clauses != o.clauses)
66
- return false if (@field != o.field)
67
-
68
- return true
69
- end
70
- alias :== :eql?
71
-
72
- def hash()
73
- return @clauses.hash ^ @field.hash
74
- end
75
-
76
- class SpanQueue < Ferret::Utils::PriorityQueue
77
- def less_than(o1, o2)
78
- if (o1.doc == o2.doc)
79
- if (o1.start == o2.start)
80
- return o1.finish < o2.finish
81
- else
82
- return o1.start < o2.start
83
- end
84
- else
85
- return o1.doc < o2.doc
86
- end
87
- end
88
- end
89
-
90
- def spans(reader)
91
- if (@clauses.size == 1) # optimize 1-clause case
92
- return @clauses[0].spans(reader)
93
- end
94
-
95
- return SpanOrEnum.new(self, reader)
96
- end
97
-
98
- class SpanOrEnum < SpansEnum
99
- def initialize(query, reader)
100
- @query = query
101
- @queue = SpanQueue.new(query.clauses.size)
102
- @all = query.clauses.map {|c| c.spans(reader)}
103
- @first_time = true
104
- end
105
-
106
- def next?
107
- if (@first_time) # first time -- initialize
108
- @all.delete_if do |spans|
109
- if (spans.next?) # move to first entry
110
- @queue.push(spans) # build queue
111
- next false
112
- else
113
- next true
114
- end
115
- end
116
- @first_time = false
117
- return @queue.size() != 0
118
- end
119
-
120
- if @queue.size == 0 # all done
121
- return false
122
- end
123
-
124
- if top().next? # move to next
125
- @queue.adjust_top()
126
- return true
127
- end
128
-
129
- @all.delete(@queue.pop()) # exhausted a clause
130
-
131
- return @queue.size() != 0
132
- end
133
-
134
- def top() return @queue.top() end
135
-
136
- def skip_to(target)
137
- if (@first_time)
138
- @all.delete_if do |spans|
139
- if (spans.skip_to(target)) # skip each spans in all
140
- @queue.push(spans) # build queue
141
- next false
142
- else
143
- next true
144
- end
145
- end
146
- @first_time = false
147
- else
148
- while (@queue.size != 0 and top().doc < target)
149
- if (top().skip_to(target))
150
- @queue.adjust_top()
151
- else
152
- @all.delete(@queue.pop())
153
- end
154
- end
155
- end
156
-
157
- return @queue.size() != 0
158
- end
159
-
160
- def doc() top().doc() end
161
- def start() top().start() end
162
- def finish() top().finish() end
163
-
164
- def to_s()
165
- buffer = "spans(#{@query})@"
166
- if @first_time
167
- buffer << "START"
168
- else
169
- buffer << (@queue.size>0 ? ("#{doc}:#{start()}-#{finish}") : "END")
170
- end
171
- return buffer
172
- end
173
- end
174
-
175
- end
176
- end