jk-ferret 0.11.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
@@ -0,0 +1,27 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class SortFieldTest < Test::Unit::TestCase
4
+ include Ferret::Search
5
+
6
+ def test_field_score()
7
+ fs = SortField::SCORE
8
+ assert_equal(:score, fs.type)
9
+ assert_nil(fs.name)
10
+ assert(!fs.reverse?, "SCORE_ID should not be reverse")
11
+ assert_nil(fs.comparator)
12
+ end
13
+
14
+ def test_field_doc()
15
+ fs = SortField::DOC_ID
16
+ assert_equal(:doc_id, fs.type)
17
+ assert_nil(fs.name)
18
+ assert(!fs.reverse?, "DOC_ID should be reverse")
19
+ assert_nil(fs.comparator)
20
+ end
21
+
22
+ def test_error_raised()
23
+ assert_raise(ArgumentError) {
24
+ fs = SortField.new(nil, :type => :integer)
25
+ }
26
+ end
27
+ end
@@ -0,0 +1,190 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class SpansBasicTest < Test::Unit::TestCase
5
+ include Ferret::Search
6
+ include Ferret::Store
7
+ include Ferret::Index
8
+ include Ferret::Search::Spans
9
+ include Ferret::Analysis
10
+
11
+ def setup()
12
+ @dir = RAMDirectory.new
13
+ iw = IndexWriter.new(:dir => @dir,
14
+ :analyzer => WhiteSpaceAnalyzer.new(),
15
+ :create => true)
16
+ [
17
+ "start finish one two three four five six seven",
18
+ "start one finish two three four five six seven",
19
+ "start one two finish three four five six seven flip",
20
+ "start one two three finish four five six seven",
21
+ "start one two three four finish five six seven",
22
+ "start one two three four five finish six seven",
23
+ "start one two three four five six finish seven eight",
24
+ "start one two three four five six seven finish eight nine",
25
+ "start one two three four five six finish seven eight",
26
+ "start one two three four five finish six seven",
27
+ "start one two three four finish five six seven",
28
+ "start one two three finish four five six seven",
29
+ "start one two finish three four five six seven flop",
30
+ "start one finish two three four five six seven",
31
+ "start finish one two three four five six seven",
32
+ "start start one two three four five six seven",
33
+ "finish start one two three four five six seven",
34
+ "finish one start two three four five six seven toot",
35
+ "finish one two start three four five six seven",
36
+ "finish one two three start four five six seven",
37
+ "finish one two three four start five six seven",
38
+ "finish one two three four five start six seven",
39
+ "finish one two three four five six start seven eight",
40
+ "finish one two three four five six seven start eight nine",
41
+ "finish one two three four five six start seven eight",
42
+ "finish one two three four five start six seven",
43
+ "finish one two three four start five six seven",
44
+ "finish one two three start four five six seven",
45
+ "finish one two start three four five six seven",
46
+ "finish one start two three four five six seven",
47
+ "finish start one two three four five six seven"
48
+ ].each { |line| iw << {:field => line} }
49
+
50
+ iw.close()
51
+
52
+ @searcher = Searcher.new(@dir)
53
+ end
54
+
55
+ def teardown()
56
+ @searcher.close
57
+ @dir.close
58
+ end
59
+
60
+ def number_split(i)
61
+ if (i < 10)
62
+ return "<#{i}>"
63
+ elsif (i < 100)
64
+ return "<#{((i/10)*10)}> <#{i%10}>"
65
+ else
66
+ return "<#{((i/100)*100)}> <#{(((i%100)/10)*10)}> <#{i%10}>"
67
+ end
68
+ end
69
+
70
+ def check_hits(query, expected, test_explain = false, top=nil)
71
+ top_docs = @searcher.search(query, {:limit => expected.length + 1})
72
+ assert_equal(expected.length, top_docs.hits.size)
73
+ assert_equal(top, top_docs.hits[0].doc) if top
74
+ assert_equal(expected.length, top_docs.total_hits)
75
+ top_docs.hits.each do |hit|
76
+ assert(expected.include?(hit.doc),
77
+ "#{hit.doc} was found unexpectedly")
78
+ if test_explain
79
+ assert(hit.score.approx_eql?(@searcher.explain(query, hit.doc).score),
80
+ "Scores(#{hit.score} != " +
81
+ "#{@searcher.explain(query, hit.doc).score})")
82
+ end
83
+ end
84
+ end
85
+
86
+ def test_span_term_query()
87
+ tq = SpanTermQuery.new(:field, "nine")
88
+ check_hits(tq, [7,23], true)
89
+ tq = SpanTermQuery.new(:field, "eight")
90
+ check_hits(tq, [6,7,8,22,23,24])
91
+ end
92
+
93
+ def test_span_multi_term_query()
94
+ tq = SpanMultiTermQuery.new(:field, ["eight", "nine"])
95
+ check_hits(tq, [6,7,8,22,23,24], true)
96
+ tq = SpanMultiTermQuery.new(:field, ["flip", "flop", "toot", "nine"])
97
+ check_hits(tq, [2,7,12,17,23])
98
+ end
99
+
100
+ def test_span_prefix_query()
101
+ tq = SpanPrefixQuery.new(:field, "fl")
102
+ check_hits(tq, [2, 12], true)
103
+ end
104
+
105
+ def test_span_near_query()
106
+ tq1 = SpanTermQuery.new(:field, "start")
107
+ tq2 = SpanTermQuery.new(:field, "finish")
108
+ q = SpanNearQuery.new(:clauses => [tq1, tq2], :in_order => true)
109
+ check_hits(q, [0,14], true)
110
+ q = SpanNearQuery.new()
111
+ q << tq1 << tq2
112
+ check_hits(q, [0,14,16,30], true)
113
+ q = SpanNearQuery.new(:clauses => [tq1, tq2],
114
+ :slop => 1, :in_order => true)
115
+ check_hits(q, [0,1,13,14])
116
+ q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1)
117
+ check_hits(q, [0,1,13,14,16,17,29,30])
118
+ q = SpanNearQuery.new(:clauses => [tq1, tq2],
119
+ :slop => 4, :in_order => true)
120
+ check_hits(q, [0,1,2,3,4,10,11,12,13,14])
121
+ q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4)
122
+ check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30])
123
+ q = SpanNearQuery.new(:clauses => [
124
+ SpanPrefixQuery.new(:field, 'se'),
125
+ SpanPrefixQuery.new(:field, 'fl')], :slop => 0)
126
+ check_hits(q, [2, 12], true)
127
+ end
128
+
129
+ def test_span_not_query()
130
+ tq1 = SpanTermQuery.new(:field, "start")
131
+ tq2 = SpanTermQuery.new(:field, "finish")
132
+ tq3 = SpanTermQuery.new(:field, "two")
133
+ tq4 = SpanTermQuery.new(:field, "five")
134
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2],
135
+ :slop => 4, :in_order => true)
136
+ nearq2 = SpanNearQuery.new(:clauses => [tq3, tq4],
137
+ :slop => 4, :in_order => true)
138
+ q = SpanNotQuery.new(nearq1, nearq2)
139
+ check_hits(q, [0,1,13,14], true)
140
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4)
141
+ q = SpanNotQuery.new(nearq1, nearq2)
142
+ check_hits(q, [0,1,13,14,16,17,29,30])
143
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq3],
144
+ :slop => 4, :in_order => true)
145
+ nearq2 = SpanNearQuery.new(:clauses => [tq2, tq4], :slop => 8)
146
+ q = SpanNotQuery.new(nearq1, nearq2)
147
+ check_hits(q, [2,3,4,5,6,7,8,9,10,11,12,15])
148
+ end
149
+
150
+ def test_span_first_query()
151
+ finish_first = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
152
+ tq = SpanTermQuery.new(:field, "finish")
153
+ q = SpanFirstQuery.new(tq, 1)
154
+ check_hits(q, finish_first, true)
155
+ q = SpanFirstQuery.new(tq, 5)
156
+ check_hits(q, [0,1,2,3,11,12,13,14]+finish_first, false)
157
+ end
158
+
159
+ def test_span_or_query_query()
160
+ tq1 = SpanTermQuery.new(:field, "start")
161
+ tq2 = SpanTermQuery.new(:field, "finish")
162
+ tq3 = SpanTermQuery.new(:field, "five")
163
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1,
164
+ :in_order => true)
165
+ nearq2 = SpanNearQuery.new(:clauses => [tq2, tq3], :slop => 0)
166
+ q = SpanOrQuery.new([nearq1, nearq2])
167
+ check_hits(q, [0,1,4,5,9,10,13,14], false)
168
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 0)
169
+ nearq2 = SpanNearQuery.new(:clauses => [tq2, tq3], :slop => 1)
170
+ q = SpanOrQuery.new([nearq1, nearq2])
171
+ check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
172
+ end
173
+
174
+ def test_span_prefix_query_max_terms
175
+ @dir = RAMDirectory.new
176
+ iw = IndexWriter.new(:dir => @dir,
177
+ :analyzer => WhiteSpaceAnalyzer.new())
178
+ 2000.times { |i| iw << {:field => "prefix#{i} term#{i}"} }
179
+ iw.close()
180
+ @searcher = Searcher.new(@dir)
181
+
182
+ pq = SpanPrefixQuery.new(:field, "prefix")
183
+ tq = SpanTermQuery.new(:field, "term1500")
184
+ q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
185
+ check_hits(q, [], false)
186
+ pq = SpanPrefixQuery.new(:field, "prefix", 2000)
187
+ q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
188
+ check_hits(q, [1500], false)
189
+ end
190
+ end
@@ -0,0 +1,436 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ module SearcherTests
4
+ include Ferret::Search
5
+
6
+ def test_term_query
7
+ tq = TermQuery.new(:field, "word2")
8
+ tq.boost = 100
9
+ check_hits(tq, [1,4,8])
10
+ #puts @searcher.explain(tq, 1)
11
+ #puts @searcher.explain(tq, 4)
12
+ #puts @searcher.explain(tq, 8)
13
+
14
+ tq = TermQuery.new(:field, "2342")
15
+ check_hits(tq, [])
16
+
17
+ tq = TermQuery.new(:field, "")
18
+ check_hits(tq, [])
19
+
20
+ tq = TermQuery.new(:field, "word1")
21
+ top_docs = @searcher.search(tq)
22
+ assert_equal(@searcher.max_doc, top_docs.total_hits)
23
+ assert_equal(10, top_docs.hits.size)
24
+ top_docs = @searcher.search(tq, {:limit => 20})
25
+ assert_equal(@searcher.max_doc, top_docs.hits.size)
26
+
27
+ assert_equal([Ferret::Term.new(:field, "word1")], tq.terms(@searcher))
28
+ end
29
+
30
+ def check_docs(query, options, expected=[])
31
+ top_docs = @searcher.search(query, options)
32
+ docs = top_docs.hits
33
+ assert_equal(expected.length, docs.length)
34
+ docs.length.times do |i|
35
+ assert_equal(expected[i], docs[i].doc)
36
+ end
37
+ if options[:limit] == :all and options[:offset] == nil
38
+ assert_equal(expected.sort, @searcher.scan(query))
39
+ end
40
+ end
41
+
42
+ def test_offset
43
+ tq = TermQuery.new(:field, "word1")
44
+ tq.boost = 100
45
+ top_docs = @searcher.search(tq, {:limit => 100})
46
+ expected = []
47
+ top_docs.hits.each do |sd|
48
+ expected << sd.doc
49
+ end
50
+
51
+ assert_raise(ArgumentError) { @searcher.search(tq, {:offset => -1}) }
52
+ assert_raise(ArgumentError) { @searcher.search(tq, {:limit => 0}) }
53
+ assert_raise(ArgumentError) { @searcher.search(tq, {:limit => -1}) }
54
+
55
+ check_docs(tq, {:limit => 8, :offset => 0}, expected[0,8])
56
+ check_docs(tq, {:limit => 3, :offset => 1}, expected[1,3])
57
+ check_docs(tq, {:limit => 6, :offset => 2}, expected[2,6])
58
+ check_docs(tq, {:limit => 2, :offset => expected.length}, [])
59
+ check_docs(tq, {:limit => 2, :offset => expected.length + 100}, [])
60
+ check_docs(tq, {:limit => :all}, expected)
61
+ check_docs(tq, {:limit => :all, :offset => 2}, expected[2..-1])
62
+ end
63
+
64
+ def test_multi_term_query
65
+ mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
66
+ check_hits(mtq, [])
67
+ assert_equal('""', mtq.to_s(:field))
68
+ assert_equal('field:""', mtq.to_s)
69
+
70
+ [
71
+ ["brown", 1.0, '"brown"'],
72
+ ["fox", 0.1, '"brown"'],
73
+ ["fox", 0.6, '"fox^0.6|brown"'],
74
+ ["fast", 50.0, '"fox^0.6|brown|fast^50.0"']
75
+ ].each do |term, boost, str|
76
+ mtq.add_term(term, boost)
77
+ assert_equal(str, mtq.to_s(:field))
78
+ assert_equal("field:#{str}", mtq.to_s())
79
+ end
80
+
81
+ mtq.boost = 80.1
82
+ assert_equal('field:"fox^0.6|brown|fast^50.0"^80.1', mtq.to_s())
83
+ mtq << "word1"
84
+ assert_equal('field:"fox^0.6|brown|word1|fast^50.0"^80.1', mtq.to_s())
85
+ mtq << "word2"
86
+ assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
87
+ mtq << "word3"
88
+ assert_equal('field:"brown|word1|word2|fast^50.0"^80.1', mtq.to_s())
89
+
90
+ terms = mtq.terms(@searcher)
91
+ assert(terms.index(Ferret::Term.new(:field, "brown")))
92
+ assert(terms.index(Ferret::Term.new(:field, "word1")))
93
+ assert(terms.index(Ferret::Term.new(:field, "word2")))
94
+ assert(terms.index(Ferret::Term.new(:field, "fast")))
95
+ end
96
+
97
+ def test_boolean_query
98
+ bq = BooleanQuery.new()
99
+ tq1 = TermQuery.new(:field, "word1")
100
+ tq2 = TermQuery.new(:field, "word3")
101
+ bq.add_query(tq1, :must)
102
+ bq.add_query(tq2, :must)
103
+ check_hits(bq, [2,3,6,8,11,14], 14)
104
+
105
+ tq3 = TermQuery.new(:field, "word2")
106
+ bq.add_query(tq3, :should)
107
+ check_hits(bq, [2,3,6,8,11,14], 8)
108
+
109
+ bq = BooleanQuery.new()
110
+ bq.add_query(tq2, :must)
111
+ bq.add_query(tq3, :must_not)
112
+ check_hits(bq, [2,3,6,11,14])
113
+
114
+ bq = BooleanQuery.new()
115
+ bq.add_query(tq2, :must_not)
116
+ check_hits(bq, [0,1,4,5,7,9,10,12,13,15,16,17])
117
+
118
+ bq = BooleanQuery.new()
119
+ bq.add_query(tq2, :should)
120
+ bq.add_query(tq3, :should)
121
+ check_hits(bq, [1,2,3,4,6,8,11,14])
122
+
123
+ bq = BooleanQuery.new()
124
+ bc1 = BooleanQuery::BooleanClause.new(tq2, :should)
125
+ bc2 = BooleanQuery::BooleanClause.new(tq3, :should)
126
+ bq << bc1
127
+ bq << bc2
128
+ check_hits(bq, [1,2,3,4,6,8,11,14])
129
+ end
130
+
131
+ def test_phrase_query()
132
+ pq = PhraseQuery.new(:field)
133
+ assert_equal("\"\"", pq.to_s(:field))
134
+ assert_equal("field:\"\"", pq.to_s)
135
+
136
+ pq << "quick" << "brown" << "fox"
137
+ check_hits(pq, [1])
138
+
139
+ pq = PhraseQuery.new(:field, 1)
140
+ pq << "quick"
141
+ pq.add_term("fox", 2)
142
+ check_hits(pq, [1,11,14,16])
143
+
144
+ pq.slop = 0
145
+ check_hits(pq, [1,11,14])
146
+
147
+ pq.slop = 1
148
+ check_hits(pq, [1,11,14,16])
149
+
150
+ pq.slop = 4
151
+ check_hits(pq, [1,11,14,16,17])
152
+ end
153
+
154
+ def test_range_query()
155
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010")
156
+ check_hits(rq, [6,7,8,9,10])
157
+
158
+ rq = RangeQuery.new(:date, :>= => "20051006", :<= => "20051010")
159
+ check_hits(rq, [6,7,8,9,10])
160
+
161
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010",
162
+ :include_lower => false)
163
+ check_hits(rq, [7,8,9,10])
164
+
165
+ rq = RangeQuery.new(:date, :> => "20051006", :<= => "20051010")
166
+ check_hits(rq, [7,8,9,10])
167
+
168
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010",
169
+ :include_upper => false)
170
+ check_hits(rq, [6,7,8,9])
171
+
172
+ rq = RangeQuery.new(:date, :>= => "20051006", :< => "20051010")
173
+ check_hits(rq, [6,7,8,9])
174
+
175
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010",
176
+ :include_lower => false, :include_upper => false)
177
+ check_hits(rq, [7,8,9])
178
+
179
+ rq = RangeQuery.new(:date, :> => "20051006", :< => "20051010")
180
+ check_hits(rq, [7,8,9])
181
+
182
+ rq = RangeQuery.new(:date, :upper => "20051003")
183
+ check_hits(rq, [0,1,2,3])
184
+
185
+ rq = RangeQuery.new(:date, :<= => "20051003")
186
+ check_hits(rq, [0,1,2,3])
187
+
188
+ rq = RangeQuery.new(:date, :upper => "20051003", :include_upper => false)
189
+ check_hits(rq, [0,1,2])
190
+
191
+ rq = RangeQuery.new(:date, :< => "20051003")
192
+ check_hits(rq, [0,1,2])
193
+
194
+ rq = RangeQuery.new(:date, :lower => "20051014")
195
+ check_hits(rq, [14,15,16,17])
196
+
197
+ rq = RangeQuery.new(:date, :>= => "20051014")
198
+ check_hits(rq, [14,15,16,17])
199
+
200
+ rq = RangeQuery.new(:date, :lower => "20051014", :include_lower => false)
201
+ check_hits(rq, [15,16,17])
202
+
203
+ rq = RangeQuery.new(:date, :> => "20051014")
204
+ check_hits(rq, [15,16,17])
205
+ end
206
+
207
+ def test_typed_range_query()
208
+ rq = TypedRangeQuery.new(:number, :>= => "-1.0", :<= => 1.0)
209
+ check_hits(rq, [0,1,4,10,15,17])
210
+
211
+ rq = TypedRangeQuery.new(:number, :> => "-1.0", :< => 1.0)
212
+ check_hits(rq, [0,1,4,15])
213
+
214
+ if ENV['FERRET_DEV']
215
+ # text hexadecimal
216
+ rq = TypedRangeQuery.new(:number, :> => "1.0", :<= =>"0xa")
217
+ check_hits(rq, [6,7,9,12])
218
+ end
219
+
220
+ # test single bound
221
+ rq = TypedRangeQuery.new(:number, :<= => "0.0")
222
+ check_hits(rq, [5,11,15,16,17])
223
+
224
+ # test single bound
225
+ rq = TypedRangeQuery.new(:number, :> => "0.0")
226
+ check_hits(rq, [0,1,2,3,4,6,7,8,9,10,12,13,14])
227
+
228
+ # below range - no results
229
+ rq = TypedRangeQuery.new(:number, :> => "10051006", :< =>"10051010")
230
+ check_hits(rq, [])
231
+
232
+ # above range - no results
233
+ rq = TypedRangeQuery.new(:number, :> => "-12518421", :< =>"-12518420")
234
+ check_hits(rq, [])
235
+ end
236
+
237
+ def test_prefix_query()
238
+ pq = PrefixQuery.new(:category, "cat1")
239
+ check_hits(pq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
240
+
241
+ pq = PrefixQuery.new(:category, "cat1/sub2")
242
+ check_hits(pq, [3, 4, 13, 15])
243
+ end
244
+
245
+ def test_wildcard_query()
246
+ wq = WildcardQuery.new(:category, "cat1*")
247
+ check_hits(wq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
248
+
249
+ wq = WildcardQuery.new(:category, "cat1*/su??ub2")
250
+ check_hits(wq, [4, 16])
251
+
252
+ wq = WildcardQuery.new(:category, "*/sub2*")
253
+ check_hits(wq, [3, 4, 13, 15])
254
+ end
255
+
256
+ def test_multi_phrase_query()
257
+ mpq = PhraseQuery.new(:field)
258
+ mpq << ["quick", "fast"]
259
+ mpq << ["brown", "red", "hairy"]
260
+ mpq << "fox"
261
+ check_hits(mpq, [1, 8, 11, 14])
262
+
263
+ mpq.slop = 4
264
+ check_hits(mpq, [1, 8, 11, 14, 16, 17])
265
+ end
266
+
267
+ def test_highlighter()
268
+ dir = Ferret::Store::RAMDirectory.new
269
+ iw = Ferret::Index::IndexWriter.new(:dir => dir,
270
+ :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new())
271
+ long_text = "big " + "between " * 2000 + 'house'
272
+ [
273
+ {:field => "the words we are searching for are one and two also " +
274
+ "sometimes looking for them as a phrase like this; one " +
275
+ "two lets see how it goes"},
276
+ {:long => 'before ' * 1000 + long_text + ' after' * 1000},
277
+ {:dates => '20070505 20071230 20060920 20081111'},
278
+ ].each {|doc| iw << doc }
279
+ iw.close
280
+
281
+ searcher = Searcher.new(dir)
282
+
283
+ q = TermQuery.new(:field, "one");
284
+ highlights = searcher.highlight(q, 0, :field,
285
+ :excerpt_length => 10,
286
+ :num_excerpts => 1)
287
+ assert_equal(1, highlights.size)
288
+ assert_equal("...are <b>one</b>...", highlights[0])
289
+
290
+ highlights = searcher.highlight(q, 0, :field,
291
+ :excerpt_length => 10,
292
+ :num_excerpts => 2)
293
+ assert_equal(2, highlights.size)
294
+ assert_equal("...are <b>one</b>...", highlights[0])
295
+ assert_equal("...this; <b>one</b>...", highlights[1])
296
+
297
+ highlights = searcher.highlight(q, 0, :field,
298
+ :excerpt_length => 10,
299
+ :num_excerpts => 3)
300
+ assert_equal(3, highlights.size)
301
+ assert_equal("the words...", highlights[0])
302
+ assert_equal("...are <b>one</b>...", highlights[1])
303
+ assert_equal("...this; <b>one</b>...", highlights[2])
304
+
305
+ highlights = searcher.highlight(q, 0, :field,
306
+ :excerpt_length => 10,
307
+ :num_excerpts => 4)
308
+ assert_equal(3, highlights.size)
309
+ assert_equal("the words we are...", highlights[0])
310
+ assert_equal("...are <b>one</b>...", highlights[1])
311
+ assert_equal("...this; <b>one</b>...", highlights[2])
312
+
313
+ highlights = searcher.highlight(q, 0, :field,
314
+ :excerpt_length => 10,
315
+ :num_excerpts => 5)
316
+ assert_equal(2, highlights.size)
317
+ assert_equal("the words we are searching for are <b>one</b>...", highlights[0])
318
+ assert_equal("...this; <b>one</b>...", highlights[1])
319
+
320
+ highlights = searcher.highlight(q, 0, :field,
321
+ :excerpt_length => 10,
322
+ :num_excerpts => 20)
323
+ assert_equal(1, highlights.size)
324
+ assert_equal("the words we are searching for are <b>one</b> and two also " +
325
+ "sometimes looking for them as a phrase like this; <b>one</b> " +
326
+ "two lets see how it goes", highlights[0])
327
+
328
+ highlights = searcher.highlight(q, 0, :field,
329
+ :excerpt_length => 1000,
330
+ :num_excerpts => 1)
331
+ assert_equal(1, highlights.size)
332
+ assert_equal("the words we are searching for are <b>one</b> and two also " +
333
+ "sometimes looking for them as a phrase like this; <b>one</b> " +
334
+ "two lets see how it goes", highlights[0])
335
+
336
+ q = BooleanQuery.new(false)
337
+ q << TermQuery.new(:field, "one")
338
+ q << TermQuery.new(:field, "two")
339
+
340
+ highlights = searcher.highlight(q, 0, :field,
341
+ :excerpt_length => 15,
342
+ :num_excerpts => 2)
343
+ assert_equal(2, highlights.size)
344
+ assert_equal("...<b>one</b> and <b>two</b>...", highlights[0])
345
+ assert_equal("...this; <b>one</b> <b>two</b>...", highlights[1])
346
+
347
+ q << (PhraseQuery.new(:field) << "one" << "two")
348
+
349
+ highlights = searcher.highlight(q, 0, :field,
350
+ :excerpt_length => 15,
351
+ :num_excerpts => 2)
352
+ assert_equal(2, highlights.size)
353
+ assert_equal("...<b>one</b> and <b>two</b>...", highlights[0])
354
+ assert_equal("...this; <b>one two</b>...", highlights[1])
355
+
356
+ highlights = searcher.highlight(q, 0, :field,
357
+ :excerpt_length => 15,
358
+ :num_excerpts => 1)
359
+ assert_equal(1, highlights.size)
360
+ # should have a higher priority since it the merger of three matches
361
+ assert_equal("...this; <b>one two</b>...", highlights[0])
362
+
363
+ highlights = searcher.highlight(q, 0, :not_a_field,
364
+ :excerpt_length => 15,
365
+ :num_excerpts => 1)
366
+ assert_nil(highlights)
367
+
368
+ q = TermQuery.new(:wrong_field, "one")
369
+ highlights = searcher.highlight(q, 0, :wrong_field,
370
+ :excerpt_length => 15,
371
+ :num_excerpts => 1)
372
+ assert_nil(highlights)
373
+
374
+ q = BooleanQuery.new(false)
375
+ q << (PhraseQuery.new(:field) << "the" << "words")
376
+ q << (PhraseQuery.new(:field) << "for" << "are" << "one" << "and" << "two")
377
+ q << TermQuery.new(:field, "words")
378
+ q << TermQuery.new(:field, "one")
379
+ q << TermQuery.new(:field, "two")
380
+
381
+ highlights = searcher.highlight(q, 0, :field,
382
+ :excerpt_length => 10,
383
+ :num_excerpts => 1)
384
+ assert_equal(1, highlights.size)
385
+ assert_equal("<b>the words</b>...", highlights[0])
386
+
387
+ highlights = searcher.highlight(q, 0, :field,
388
+ :excerpt_length => 10,
389
+ :num_excerpts => 2)
390
+ assert_equal(2, highlights.size)
391
+ assert_equal("<b>the words</b>...", highlights[0])
392
+ assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
393
+
394
+ [
395
+ [RangeQuery.new(:dates, :>= => '20081111'),
396
+ '20070505 20071230 20060920 <b>20081111</b>'],
397
+ [RangeQuery.new(:dates, :>= => '20070101'),
398
+ '<b>20070505</b> <b>20071230</b> 20060920 <b>20081111</b>'],
399
+ [PrefixQuery.new(:dates, '2007'),
400
+ '<b>20070505</b> <b>20071230</b> 20060920 20081111'],
401
+ ].each do |query, expected|
402
+ assert_equal([expected],
403
+ searcher.highlight(query, 2, :dates))
404
+ end
405
+
406
+ #q = PhraseQuery.new(:long) << 'big' << 'house'
407
+ #q.slop = 4000
408
+ #highlights = searcher.highlight(q, 1, :long,
409
+ # :excerpt_length => 400,
410
+ # :num_excerpts => 2)
411
+ #assert_equal(1, highlights.size)
412
+ #puts highlights[0]
413
+ #assert_equal("<b>the words</b>...", highlights[0])
414
+ #assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
415
+ end
416
+
417
+ def test_highlighter_with_standard_analyzer()
418
+ dir = Ferret::Store::RAMDirectory.new
419
+ iw = Ferret::Index::IndexWriter.new(:dir => dir,
420
+ :analyzer => Ferret::Analysis::StandardAnalyzer.new())
421
+ [
422
+ {:field => "field has a url http://ferret.davebalmain.com/trac/ end"},
423
+ ].each {|doc| iw << doc }
424
+ iw.close
425
+
426
+ searcher = Searcher.new(dir)
427
+
428
+ q = TermQuery.new(:field, "ferret.davebalmain.com/trac");
429
+ highlights = searcher.highlight(q, 0, :field,
430
+ :excerpt_length => 1000,
431
+ :num_excerpts => 1)
432
+ assert_equal(1, highlights.size)
433
+ assert_equal("field has a url <b>http://ferret.davebalmain.com/trac/</b> end",
434
+ highlights[0])
435
+ end
436
+ end