jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
@@ -0,0 +1,238 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class QueryParserTest < Test::Unit::TestCase
4
+ include Ferret::Analysis
5
+
6
+ def test_strings()
7
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
8
+ :fields => ["xxx", "field", "f1", "f2"],
9
+ :tokenized_fields => ["xxx", "f1", "f2"])
10
+ pairs = [
11
+ ['', ''],
12
+ ['*:word', 'word field:word f1:word f2:word'],
13
+ ['word', 'word'],
14
+ ['field:word', 'field:word'],
15
+ ['"word1 word2 word#"', '"word1 word2 word"'],
16
+ ['"word1 %%% word3"', '"word1 <> word3"~1'],
17
+ ['field:"one two three"', 'field:"one two three"'],
18
+ ['field:"one %%% three"', 'field:"one %%% three"'],
19
+ ['f1:"one %%% three"', 'f1:"one <> three"~1'],
20
+ ['field:"one <> three"', 'field:"one <> three"'],
21
+ ['field:"one <> three <>"', 'field:"one <> three"'],
22
+ ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
23
+ ['field:"one <> 222 <> three|four|five <>"', 'field:"one <> 222 <> three|four|five"'],
24
+ ['field:"on1|tw2 THREE|four|five six|seven"', 'field:"on1|tw2 THREE|four|five six|seven"'],
25
+ ['field:"testing|trucks"', 'field:"testing|trucks"'],
26
+ ['[aaa bbb]', '[aaa bbb]'],
27
+ ['{aaa bbb]', '{aaa bbb]'],
28
+ ['field:[aaa bbb}', 'field:[aaa bbb}'],
29
+ ['{aaa bbb}', '{aaa bbb}'],
30
+ ['{aaa>', '{aaa>'],
31
+ ['[aaa>', '[aaa>'],
32
+ ['field:<a\ aa}', 'field:<a aa}'],
33
+ ['<aaa]', '<aaa]'],
34
+ ['>aaa', '{aaa>'],
35
+ ['>=aaa', '[aaa>'],
36
+ ['<aaa', '<aaa}'],
37
+ ['[A>', '[a>'],
38
+ ['field:<=aaa', 'field:<aaa]'],
39
+ ['REQ one REQ two', '+one +two'],
40
+ ['REQ one two', '+one two'],
41
+ ['one REQ two', 'one +two'],
42
+ ['+one +two', '+one +two'],
43
+ ['+one two', '+one two'],
44
+ ['one +two', 'one +two'],
45
+ ['-one -two', '-one -two'],
46
+ ['-one two', '-one two'],
47
+ ['one -two', 'one -two'],
48
+ ['!one !two', '-one -two'],
49
+ ['!one two', '-one two'],
50
+ ['one !two', 'one -two'],
51
+ ['NOT one NOT two', '-one -two'],
52
+ ['NOT one two', '-one two'],
53
+ ['one NOT two', 'one -two'],
54
+ ['NOT two', '-two +*'],
55
+ ['one two', 'one two'],
56
+ ['one OR two', 'one two'],
57
+ ['one AND two', '+one +two'],
58
+ ['one two AND three', 'one two +three'],
59
+ ['one two OR three', 'one two three'],
60
+ ['one (two AND three)', 'one (+two +three)'],
61
+ ['one AND (two OR three)', '+one +(two three)'],
62
+ ['field:(one AND (two OR three))', '+field:one +(field:two field:three)'],
63
+ ['one AND (two OR [aaa vvv})', '+one +(two [aaa vvv})'],
64
+ ['one AND (f1:two OR f2:three) AND four', '+one +(f1:two f2:three) +four'],
65
+ ['one^1.23', 'one^1.23'],
66
+ ['(one AND two)^100.23', '(+one +two)^100.23'],
67
+ ['field:(one AND two)^100.23', '(+field:one +field:two)^100.23'],
68
+ ['field:(one AND [aaa bbb]^23.3)^100.23', '(+field:one +field:[aaa bbb]^23.3)^100.23'],
69
+ ['(REQ field:"one two three")^23', 'field:"one two three"^23.0'],
70
+ ['asdf~0.2', 'asdf~0.2'],
71
+ ['field:asdf~0.2', 'field:asdf~0.2'],
72
+ ['asdf~0.2^100.0', 'asdf~0.2^100.0'],
73
+ ['field:asdf~0.2^0.1', 'field:asdf~0.2^0.1'],
74
+ ['field:"asdf <> asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'],
75
+ ['"one two three four five"~5', '"one two three four five"~5'],
76
+ ['ab?de', 'ab?de'],
77
+ ['ab*de', 'ab*de'],
78
+ ['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
79
+ ['field:a* AND field:(b*)', '+field:a* +field:b*'],
80
+ ['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'],
81
+ ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
82
+
83
+ ['*:xxx', 'xxx field:xxx f1:xxx f2:xxx'],
84
+ ['f1|f2:xxx', 'f1:xxx f2:xxx'],
85
+
86
+ ['*:asd~0.2', 'asd~0.2 field:asd~0.2 f1:asd~0.2 f2:asd~0.2'],
87
+ ['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
88
+
89
+ ['*:a?d*^20.0', '(a?d* field:a?d* f1:a?d* f2:a?d*)^20.0'],
90
+ ['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
91
+
92
+ ['*:"asdf <> xxx|yyy"', '"asdf <> xxx|yyy" field:"asdf <> xxx|yyy" f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
93
+ ['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
94
+ ['f1|f2:"asdf <> do|yyy"', 'f1:"asdf <> yyy" f2:"asdf <> yyy"'],
95
+ ['f1|f2:"do|cat"', 'f1:cat f2:cat'],
96
+
97
+ ['*:[bbb xxx]', '[bbb xxx] field:[bbb xxx] f1:[bbb xxx] f2:[bbb xxx]'],
98
+ ['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
99
+
100
+ ['*:(xxx AND bbb)', '+(xxx field:xxx f1:xxx f2:xxx) +(bbb field:bbb f1:bbb f2:bbb)'],
101
+ ['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
102
+ ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
103
+ ['"onewordphrase"', 'onewordphrase'],
104
+ ["who'd", "who'd"]
105
+ ]
106
+
107
+ pairs.each do |query_str, expected|
108
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
109
+ end
110
+ end
111
+
112
+ def test_qp_with_standard_analyzer()
113
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
114
+ :fields => ["xxx", "key"],
115
+ :analyzer => StandardAnalyzer.new)
116
+ pairs = [
117
+ ['key:1234', 'key:1234'],
118
+ ['key:(1234 and Dave)', 'key:1234 key:dave'],
119
+ ['key:(1234)', 'key:1234'],
120
+ ['and the but they with', '']
121
+ ]
122
+
123
+ pairs.each do |query_str, expected|
124
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
125
+ end
126
+
127
+ end
128
+
129
+ def test_qp_changing_fields()
130
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
131
+ :fields => ["xxx", "key"],
132
+ :analyzer => WhiteSpaceAnalyzer.new)
133
+ assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
134
+
135
+ parser.fields = ["xxx", "one", "two", "three"]
136
+ assert_equal('word one:word two:word three:word',
137
+ parser.parse("*:word").to_s("xxx"))
138
+ assert_equal('three:word four:word',
139
+ parser.parse("three:word four:word").to_s("xxx"))
140
+ end
141
+
142
+ def test_qp_allow_any_field()
143
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
144
+ :fields => ["xxx", "key"],
145
+ :analyzer => WhiteSpaceAnalyzer.new,
146
+ :validate_fields => true)
147
+
148
+ assert_equal('key:word',
149
+ parser.parse("key:word song:word").to_s("xxx"))
150
+ assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
151
+
152
+
153
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
154
+ :fields => ["xxx", "key"],
155
+ :analyzer => WhiteSpaceAnalyzer.new)
156
+
157
+ assert_equal('key:word song:word',
158
+ parser.parse("key:word song:word").to_s("xxx"))
159
+ assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
160
+ end
161
+
162
+ def do_test_query_parse_exception_raised(str)
163
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
164
+ :fields => ["f1", "f2", "f3"],
165
+ :handle_parse_errors => false)
166
+ assert_raise(Ferret::QueryParser::QueryParseException,
167
+ str + " should have failed") do
168
+ parser.parse(str)
169
+ end
170
+ end
171
+
172
+ def test_or_default
173
+ parser = Ferret::QueryParser.new(:default_field => :*,
174
+ :fields => [:x, :y],
175
+ :or_default => false,
176
+ :analyzer => StandardAnalyzer.new)
177
+ pairs = [
178
+ ['word', 'x:word y:word'],
179
+ ['word1 word2', '+(x:word1 y:word1) +(x:word2 y:word2)']
180
+ ]
181
+
182
+ pairs.each do |query_str, expected|
183
+ assert_equal(expected, parser.parse(query_str).to_s(""))
184
+ end
185
+ end
186
+
187
+ def test_prefix_query
188
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
189
+ :fields => ["xxx"],
190
+ :analyzer => StandardAnalyzer.new)
191
+ assert_equal(Ferret::Search::PrefixQuery, parser.parse("asdg*").class)
192
+ assert_equal(Ferret::Search::WildcardQuery, parser.parse("a?dg*").class)
193
+ assert_equal(Ferret::Search::WildcardQuery, parser.parse("a*dg*").class)
194
+ assert_equal(Ferret::Search::WildcardQuery, parser.parse("adg*c").class)
195
+ end
196
+
197
+ def test_bad_queries
198
+ parser = Ferret::QueryParser.new(:default_field => "xxx",
199
+ :fields => ["f1", "f2"])
200
+
201
+ pairs = [
202
+ ['::*word', 'word'],
203
+ ['::*&)(*^&*(', ''],
204
+ ['::*&one)(*two(*&"', '"one two"~1'],
205
+ [':', ''],
206
+ ['[, ]', ''],
207
+ ['{, }', ''],
208
+ ['!', ''],
209
+ ['+', ''],
210
+ ['~', ''],
211
+ ['^', ''],
212
+ ['-', ''],
213
+ ['|', ''],
214
+ ['<, >', ''],
215
+ ['=', ''],
216
+ ['<script>', 'script']
217
+ ]
218
+
219
+ pairs.each do |query_str, expected|
220
+ do_test_query_parse_exception_raised(query_str)
221
+ assert_equal(expected, parser.parse(query_str).to_s("xxx"))
222
+ end
223
+ end
224
+
225
+ def test_use_keywords_switch
226
+ analyzer = LetterAnalyzer.new
227
+ parser = Ferret::QueryParser.new(:analyzer => analyzer,
228
+ :default_field => "xxx")
229
+ assert_equal("+www (+xxx +yyy) -zzz",
230
+ parser.parse("REQ www (xxx AND yyy) OR NOT zzz").to_s("xxx"))
231
+
232
+ parser = Ferret::QueryParser.new(:analyzer => analyzer,
233
+ :default_field => "xxx",
234
+ :use_keywords => false)
235
+ assert_equal("req www (xxx and yyy) or not zzz",
236
+ parser.parse("REQ www (xxx AND yyy) OR NOT zzz").to_s("xxx"))
237
+ end
238
+ end
@@ -0,0 +1,156 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require 'date'
3
+
4
+
5
+ class FilterTest < Test::Unit::TestCase
6
+ include Ferret::Search
7
+ include Ferret::Analysis
8
+ include Ferret::Index
9
+
10
+ def setup()
11
+ @dir = Ferret::Store::RAMDirectory.new()
12
+ iw = IndexWriter.new(:dir => @dir,
13
+ :analyzer => WhiteSpaceAnalyzer.new(),
14
+ :create => true)
15
+ [
16
+ {:int => "0", :date => "20040601", :switch => "on"},
17
+ {:int => "1", :date => "20041001", :switch => "off"},
18
+ {:int => "2", :date => "20051101", :switch => "on"},
19
+ {:int => "3", :date => "20041201", :switch => "off"},
20
+ {:int => "4", :date => "20051101", :switch => "on"},
21
+ {:int => "5", :date => "20041201", :switch => "off"},
22
+ {:int => "6", :date => "20050101", :switch => "on"},
23
+ {:int => "7", :date => "20040701", :switch => "off"},
24
+ {:int => "8", :date => "20050301", :switch => "on"},
25
+ {:int => "9", :date => "20050401", :switch => "off"}
26
+ ].each {|doc| iw << doc}
27
+ iw.close
28
+ end
29
+
30
+ def teardown()
31
+ @dir.close()
32
+ end
33
+
34
+ def do_test_top_docs(searcher, query, expected, filter)
35
+ top_docs = searcher.search(query, {:filter => filter})
36
+ #puts top_docs
37
+ assert_equal(expected.size, top_docs.hits.size)
38
+ top_docs.total_hits.times do |i|
39
+ assert_equal(expected[i], top_docs.hits[i].doc)
40
+ end
41
+ end
42
+
43
+ def test_range_filter
44
+ searcher = Searcher.new(@dir)
45
+ q = MatchAllQuery.new()
46
+ rf = RangeFilter.new(:int, :>= => "2", :<= => "6")
47
+ do_test_top_docs(searcher, q, [2,3,4,5,6], rf)
48
+ rf = RangeFilter.new(:int, :>= => "2", :< => "6")
49
+ do_test_top_docs(searcher, q, [2,3,4,5], rf)
50
+ rf = RangeFilter.new(:int, :> => "2", :<= => "6")
51
+ do_test_top_docs(searcher, q, [3,4,5,6], rf)
52
+ rf = RangeFilter.new(:int, :> => "2", :< => "6")
53
+ do_test_top_docs(searcher, q, [3,4,5], rf)
54
+ rf = RangeFilter.new(:int, :>= => "6")
55
+ do_test_top_docs(searcher, q, [6,7,8,9], rf)
56
+ rf = RangeFilter.new(:int, :> => "6")
57
+ do_test_top_docs(searcher, q, [7,8,9], rf)
58
+ rf = RangeFilter.new(:int, :<= => "2")
59
+ do_test_top_docs(searcher, q, [0,1,2], rf)
60
+ rf = RangeFilter.new(:int, :< => "2")
61
+ do_test_top_docs(searcher, q, [0,1], rf)
62
+
63
+ bits = rf.bits(searcher.reader)
64
+ assert(bits[0])
65
+ assert(bits[1])
66
+ assert(!bits[2])
67
+ assert(!bits[3])
68
+ assert(!bits[4])
69
+ end
70
+
71
+ def test_range_filter_errors
72
+ assert_raise(ArgumentError) {f = RangeFilter.new(:f, :> => "b", :< => "a")}
73
+ assert_raise(ArgumentError) {f = RangeFilter.new(:f, :include_lower => true)}
74
+ assert_raise(ArgumentError) {f = RangeFilter.new(:f, :include_upper => true)}
75
+ end
76
+
77
+ def test_query_filter()
78
+ searcher = Searcher.new(@dir)
79
+ q = MatchAllQuery.new()
80
+ qf = QueryFilter.new(TermQuery.new(:switch, "on"))
81
+ do_test_top_docs(searcher, q, [0,2,4,6,8], qf)
82
+ # test again to test caching doesn't break it
83
+ do_test_top_docs(searcher, q, [0,2,4,6,8], qf)
84
+ qf = QueryFilter.new(TermQuery.new(:switch, "off"))
85
+ do_test_top_docs(searcher, q, [1,3,5,7,9], qf)
86
+
87
+ bits = qf.bits(searcher.reader)
88
+ assert(bits[1])
89
+ assert(bits[3])
90
+ assert(bits[5])
91
+ assert(bits[7])
92
+ assert(bits[9])
93
+ assert(!bits[0])
94
+ assert(!bits[2])
95
+ assert(!bits[4])
96
+ assert(!bits[6])
97
+ assert(!bits[8])
98
+ end
99
+
100
+ def test_filtered_query
101
+ searcher = Searcher.new(@dir)
102
+ q = MatchAllQuery.new()
103
+ rf = RangeFilter.new(:int, :>= => "2", :<= => "6")
104
+ rq = FilteredQuery.new(q, rf)
105
+ qf = QueryFilter.new(TermQuery.new(:switch, "on"))
106
+ do_test_top_docs(searcher, rq, [2,4,6], qf)
107
+ query = FilteredQuery.new(rq, qf)
108
+ rf2 = RangeFilter.new(:int, :>= => "3")
109
+ do_test_top_docs(searcher, query, [4,6], rf2)
110
+ end
111
+
112
+ class CustomFilter
113
+ def bits(ir)
114
+ bv = Ferret::Utils::BitVector.new
115
+ bv[0] = bv[2] = bv[4] = true
116
+ bv
117
+ end
118
+ end
119
+
120
+ def test_custom_filter
121
+ searcher = Searcher.new(@dir)
122
+ q = MatchAllQuery.new
123
+ filt = CustomFilter.new
124
+ do_test_top_docs(searcher, q, [0, 2, 4], filt)
125
+ end
126
+
127
+ def test_filter_proc
128
+ searcher = Searcher.new(@dir)
129
+ q = MatchAllQuery.new()
130
+ filter_proc = lambda {|doc, score, s| (s[doc][:int] % 2) == 0}
131
+ top_docs = searcher.search(q, :filter_proc => filter_proc)
132
+ top_docs.hits.each do |hit|
133
+ assert_equal(0, searcher[hit.doc][:int] % 2)
134
+ end
135
+ end
136
+
137
+ def test_score_modifying_filter_proc
138
+ searcher = Searcher.new(@dir)
139
+ q = MatchAllQuery.new()
140
+ start_date = Date.parse('2008-02-08')
141
+ date_half_life_50 = lambda do |doc, score, s|
142
+ days = (start_date - Date.parse(s[doc][:date], '%Y%m%d')).to_i
143
+ 1.0 / (2.0 ** (days.to_f / 50.0))
144
+ end
145
+ top_docs = searcher.search(q, :filter_proc => date_half_life_50)
146
+ docs = top_docs.hits.collect {|hit| hit.doc}
147
+ assert_equal(docs, [2,4,9,8,6,3,5,1,7,0])
148
+ rev_date_half_life_50 = lambda do |doc, score, s|
149
+ days = (start_date - Date.parse(s[doc][:date], '%Y%m%d')).to_i
150
+ 1.0 - 1.0 / (2.0 ** (days.to_f / 50.0))
151
+ end
152
+ top_docs = searcher.search(q, :filter_proc => rev_date_half_life_50)
153
+ docs = top_docs.hits.collect {|hit| hit.doc}
154
+ assert_equal(docs, [0,7,1,3,5,6,8,9,2,4])
155
+ end
156
+ end
@@ -0,0 +1,147 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class FuzzyQueryTest < Test::Unit::TestCase
4
+ include Ferret::Search
5
+ include Ferret::Store
6
+ include Ferret::Analysis
7
+ include Ferret::Index
8
+
9
+ def add_doc(text, writer)
10
+ writer << {:field => text}
11
+ end
12
+
13
+ def setup()
14
+ @dir = RAMDirectory.new()
15
+ end
16
+
17
+ def teardown()
18
+ @dir.close()
19
+ end
20
+
21
+ def do_test_top_docs(is, query, expected)
22
+ top_docs = is.search(query)
23
+ assert_equal(expected.length, top_docs.total_hits,
24
+ "expected #{expected.length} hits but got #{top_docs.total_hits}")
25
+ assert_equal(expected.length, top_docs.hits.size)
26
+ top_docs.total_hits.times do |i|
27
+ assert_equal(expected[i], top_docs.hits[i].doc)
28
+ end
29
+ end
30
+
31
+ def do_prefix_test(is, text, prefix, expected)
32
+ fq = FuzzyQuery.new(:field, text, :prefix_length => prefix)
33
+ #puts is.explain(fq, 0)
34
+ #puts is.explain(fq, 1)
35
+ do_test_top_docs(is, fq, expected)
36
+ end
37
+
38
+ def test_fuzziness()
39
+ iw = IndexWriter.new(:dir => @dir,
40
+ :analyzer => WhiteSpaceAnalyzer.new(),
41
+ :create => true)
42
+ add_doc("aaaaa", iw)
43
+ add_doc("aaaab", iw)
44
+ add_doc("aaabb", iw)
45
+ add_doc("aabbb", iw)
46
+ add_doc("abbbb", iw)
47
+ add_doc("bbbbb", iw)
48
+ add_doc("ddddd", iw)
49
+ add_doc("ddddddddddddddddddddd", iw) # test max_distances problem
50
+ add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw) # test max_distances problem
51
+ #iw.optimize()
52
+ iw.close()
53
+
54
+
55
+ is = Searcher.new(@dir)
56
+
57
+ fq = FuzzyQuery.new(:field, "aaaaa", :prefix_length => 5)
58
+
59
+ do_prefix_test(is, "aaaaaaaaaaaaaaaaaaaaaa", 1, [8])
60
+ do_prefix_test(is, "aaaaa", 0, [0,1,2])
61
+ do_prefix_test(is, "aaaaa", 1, [0,1,2])
62
+ do_prefix_test(is, "aaaaa", 2, [0,1,2])
63
+ do_prefix_test(is, "aaaaa", 3, [0,1,2])
64
+ do_prefix_test(is, "aaaaa", 4, [0,1])
65
+ do_prefix_test(is, "aaaaa", 5, [0])
66
+ do_prefix_test(is, "aaaaa", 6, [0])
67
+
68
+ do_prefix_test(is, "xxxxx", 0, [])
69
+
70
+ do_prefix_test(is, "aaccc", 0, [])
71
+
72
+ do_prefix_test(is, "aaaac", 0, [0,1,2])
73
+ do_prefix_test(is, "aaaac", 1, [0,1,2])
74
+ do_prefix_test(is, "aaaac", 2, [0,1,2])
75
+ do_prefix_test(is, "aaaac", 3, [0,1,2])
76
+ do_prefix_test(is, "aaaac", 4, [0,1])
77
+ do_prefix_test(is, "aaaac", 5, [])
78
+
79
+ do_prefix_test(is, "ddddX", 0, [6])
80
+ do_prefix_test(is, "ddddX", 1, [6])
81
+ do_prefix_test(is, "ddddX", 2, [6])
82
+ do_prefix_test(is, "ddddX", 3, [6])
83
+ do_prefix_test(is, "ddddX", 4, [6])
84
+ do_prefix_test(is, "ddddX", 5, [])
85
+
86
+ fq = FuzzyQuery.new(:anotherfield, "ddddX", :prefix_length => 0)
87
+ top_docs = is.search(fq)
88
+ assert_equal(0, top_docs.total_hits)
89
+
90
+ is.close()
91
+ end
92
+
93
+ def test_fuzziness_long()
94
+ iw = IndexWriter.new(:dir => @dir,
95
+ :analyzer => WhiteSpaceAnalyzer.new(),
96
+ :create => true)
97
+ add_doc("aaaaaaa", iw)
98
+ add_doc("segment", iw)
99
+ iw.optimize()
100
+ iw.close()
101
+ is = Searcher.new(@dir)
102
+
103
+ # not similar enough:
104
+ do_prefix_test(is, "xxxxx", 0, [])
105
+
106
+ # edit distance to "aaaaaaa" = 3, this matches because the string is longer than
107
+ # in testDefaultFuzziness so a bigger difference is allowed:
108
+ do_prefix_test(is, "aaaaccc", 0, [0])
109
+
110
+ # now with prefix
111
+ do_prefix_test(is, "aaaaccc", 1, [0])
112
+ do_prefix_test(is, "aaaaccc", 4, [0])
113
+ do_prefix_test(is, "aaaaccc", 5, [])
114
+
115
+ # no match, more than half of the characters is wrong:
116
+ do_prefix_test(is, "aaacccc", 0, [])
117
+
118
+ # now with prefix
119
+ do_prefix_test(is, "aaacccc", 1, [])
120
+
121
+ # "student" and "stellent" are indeed similar to "segment" by default:
122
+ do_prefix_test(is, "student", 0, [1])
123
+ do_prefix_test(is, "stellent", 0, [1])
124
+
125
+ # now with prefix
126
+ do_prefix_test(is, "student", 2, [])
127
+ do_prefix_test(is, "stellent", 2, [])
128
+
129
+ # "student" doesn't match anymore thanks to increased minimum similarity:
130
+ fq = FuzzyQuery.new(:field, "student",
131
+ :min_similarity => 0.6,
132
+ :prefix_length => 0)
133
+
134
+ top_docs = is.search(fq)
135
+ assert_equal(0, top_docs.total_hits)
136
+
137
+ assert_raise(ArgumentError) do
138
+ fq = FuzzyQuery.new(:f, "s", :min_similarity => 1.1)
139
+ end
140
+ assert_raise(ArgumentError) do
141
+ fq = FuzzyQuery.new(:f, "s", :min_similarity => -0.1)
142
+ end
143
+
144
+ is.close()
145
+ end
146
+
147
+ end