ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
@@ -44,8 +44,74 @@ class SearchAndSortTest < Test::Unit::TestCase
44
44
  top_docs.total_hits.times do |i|
45
45
  assert_equal(expected[i], top_docs.score_docs[i].doc)
46
46
  end
47
+
48
+ # test sorting works for smaller ranged query
49
+ first_doc = 3
50
+ num_docs = 3
51
+ top_docs = is.search(query, {:sort => sort,
52
+ :first_doc => first_doc,
53
+ :num_docs => num_docs})
54
+ num_docs.times do |i|
55
+ assert_equal(expected[first_doc + i], top_docs.score_docs[i].doc)
56
+ end
57
+ end
58
+
59
+ def test_sort_field_to_s()
60
+ assert_equal("<SCORE>", SortField::FIELD_SCORE.to_s);
61
+ sf = SortField.new("MyScore",
62
+ {:sort_type => SortField::SortType::SCORE,
63
+ :reverse => true})
64
+ assert_equal("MyScore:<SCORE>!", sf.to_s)
65
+ assert_equal("<DOC>", SortField::FIELD_DOC.to_s);
66
+ sf = SortField.new("MyDoc",
67
+ {:sort_type => SortField::SortType::DOC,
68
+ :reverse => true})
69
+ assert_equal("MyDoc:<DOC>!", sf.to_s)
70
+ sf = SortField.new("date",
71
+ {:sort_type => SortField::SortType::INTEGER})
72
+ assert_equal("date:<integer>", sf.to_s)
73
+ sf = SortField.new("date",
74
+ {:sort_type => SortField::SortType::INTEGER,
75
+ :reverse => true})
76
+ assert_equal("date:<integer>!", sf.to_s)
77
+ sf = SortField.new("price",
78
+ {:sort_type => SortField::SortType::FLOAT})
79
+ assert_equal("price:<float>", sf.to_s)
80
+ sf = SortField.new("price",
81
+ {:sort_type => SortField::SortType::FLOAT,
82
+ :reverse => true})
83
+ assert_equal("price:<float>!", sf.to_s)
84
+ sf = SortField.new("content",
85
+ {:sort_type => SortField::SortType::STRING})
86
+ assert_equal("content:<string>", sf.to_s)
87
+ sf = SortField.new("content",
88
+ {:sort_type => SortField::SortType::STRING,
89
+ :reverse => true})
90
+ assert_equal("content:<string>!", sf.to_s)
91
+ sf = SortField.new("auto_field",
92
+ {:sort_type => SortField::SortType::AUTO})
93
+ assert_equal("auto_field:<auto>", sf.to_s)
94
+ sf = SortField.new("auto_field",
95
+ {:sort_type => SortField::SortType::AUTO,
96
+ :reverse => true})
97
+ assert_equal("auto_field:<auto>!", sf.to_s)
98
+ end
99
+
100
+ def test_sort_to_s()
101
+ sort = Sort.new
102
+ assert_equal("Sort[<SCORE>, <DOC>]", sort.to_s)
103
+ sf = SortField.new("auto_field",
104
+ {:sort_type => SortField::SortType::AUTO,
105
+ :reverse => true})
106
+ sort = Sort.new([sf, SortField::FIELD_SCORE, SortField::FIELD_DOC])
107
+ assert_equal("Sort[auto_field:<auto>!, <SCORE>, <DOC>]", sort.to_s)
108
+ sort = Sort.new(["one", "two", SortField::FIELD_DOC])
109
+ assert_equal("Sort[one:<auto>, two:<auto>, <DOC>]", sort.to_s)
110
+ sort = Sort.new(["one", "two"])
111
+ assert_equal("Sort[one:<auto>, two:<auto>, <DOC>]", sort.to_s)
47
112
  end
48
113
 
114
+
49
115
  def test_sorts()
50
116
  is = IndexSearcher.new(@dir)
51
117
  q = TermQuery.new(Term.new("search", "findall"))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.1
7
- date: 2006-04-11 00:00:00 +09:00
6
+ version: 0.9.2
7
+ date: 2006-05-11 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -30,9 +30,9 @@ authors:
30
30
  files:
31
31
  - setup.rb
32
32
  - Rakefile
33
+ - TODO
33
34
  - README
34
35
  - MIT-LICENSE
35
- - TODO
36
36
  - TUTORIAL
37
37
  - CHANGELOG
38
38
  - ext/ferret.c
@@ -51,7 +51,6 @@ files:
51
51
  - ext/document.c
52
52
  - ext/compound_io.c
53
53
  - ext/index_rw.c
54
- - ext/termdocs.c
55
54
  - ext/vector.c
56
55
  - ext/field.c
57
56
  - ext/term.c
@@ -60,20 +59,20 @@ files:
60
59
  - ext/q_boolean.c
61
60
  - ext/q_match_all.c
62
61
  - ext/q_phrase.c
63
- - ext/q_fuzzy.c
62
+ - ext/q_filtered_query.c
64
63
  - ext/search.c
65
64
  - ext/dummy.exe
66
- - ext/q_multi_phrase.c
65
+ - ext/q_fuzzy.c
67
66
  - ext/q_wildcard.c
68
67
  - ext/ind.c
69
68
  - ext/q_range.c
69
+ - ext/q_multi_phrase.c
70
70
  - ext/q_prefix.c
71
71
  - ext/q_span.c
72
72
  - ext/filter.c
73
73
  - ext/similarity.c
74
- - ext/sort.c
75
74
  - ext/q_term.c
76
- - ext/q_filtered_query.c
75
+ - ext/sort.c
77
76
  - ext/index_io.c
78
77
  - ext/fs_store.c
79
78
  - ext/ram_store.c
@@ -86,22 +85,6 @@ files:
86
85
  - ext/hash.c
87
86
  - ext/except.c
88
87
  - ext/priorityqueue.c
89
- - ext/document.h
90
- - ext/store.h
91
- - ext/array.h
92
- - ext/priorityqueue.h
93
- - ext/hashset.h
94
- - ext/helper.h
95
- - ext/global.h
96
- - ext/lang.h
97
- - ext/bitvector.h
98
- - ext/analysis.h
99
- - ext/hash.h
100
- - ext/search.h
101
- - ext/ferret.h
102
- - ext/index.h
103
- - ext/except.h
104
- - ext/similarity.h
105
88
  - ext/libstemmer.h
106
89
  - ext/libstemmer.c
107
90
  - ext/modules.h
@@ -112,6 +95,7 @@ files:
112
95
  - ext/stem_ISO_8859_1_italian.c
113
96
  - ext/stem_UTF_8_portuguese.c
114
97
  - ext/stem_UTF_8_portuguese.h
98
+ - ext/ferret.h
115
99
  - ext/stem_UTF_8_french.c
116
100
  - ext/stem_UTF_8_spanish.c
117
101
  - ext/stem_UTF_8_dutch.c
@@ -130,7 +114,6 @@ files:
130
114
  - ext/stem_ISO_8859_1_portuguese.c
131
115
  - ext/stem_UTF_8_russian.c
132
116
  - ext/stem_ISO_8859_1_spanish.c
133
- - ext/tags
134
117
  - ext/stem_ISO_8859_1_french.c
135
118
  - ext/stem_ISO_8859_1_portuguese.h
136
119
  - ext/stem_ISO_8859_1_dutch.c
@@ -140,6 +123,7 @@ files:
140
123
  - ext/stem_ISO_8859_1_spanish.h
141
124
  - ext/stem_ISO_8859_1_french.h
142
125
  - ext/stem_ISO_8859_1_porter.c
126
+ - ext/tags
143
127
  - ext/stem_ISO_8859_1_dutch.h
144
128
  - ext/stem_UTF_8_finnish.c
145
129
  - ext/stem_KOI8_R_russian.h
@@ -162,6 +146,24 @@ files:
162
146
  - ext/stem_ISO_8859_1_danish.h
163
147
  - ext/stem_UTF_8_english.h
164
148
  - ext/stem_UTF_8_norwegian.h
149
+ - ext/document.h
150
+ - ext/store.h
151
+ - ext/array.h
152
+ - ext/priorityqueue.h
153
+ - ext/hashset.h
154
+ - ext/helper.h
155
+ - ext/global.h
156
+ - ext/bitvector.h
157
+ - ext/analysis.h
158
+ - ext/hash.h
159
+ - ext/search.h
160
+ - ext/similarity.h
161
+ - ext/index.h
162
+ - ext/except.h
163
+ - ext/lang.h
164
+ - ext/frtio.h
165
+ - ext/w32_io.c
166
+ - ext/nix_io.c
165
167
  - ext/inc/lang.h
166
168
  - ext/inc/except.h
167
169
  - lib/ferret.rb
@@ -257,6 +259,7 @@ files:
257
259
  - lib/ferret/search/range_filter.rb
258
260
  - lib/ferret/search/field_cache.rb
259
261
  - lib/ferret/search/match_all_query.rb
262
+ - lib/ferret/search/multi_searcher.rb
260
263
  - lib/ferret/search/spans/near_spans_enum.rb
261
264
  - lib/ferret/search/spans/span_first_query.rb
262
265
  - lib/ferret/search/spans/spans_enum.rb
@@ -345,13 +348,14 @@ files:
345
348
  - test/unit/document/rtc_field.rb
346
349
  - test/unit/document/tc_document.rb
347
350
  - test/unit/query_parser/tc_query_parser.rb
348
- - test/unit/query_parser/rtc_query_parser.rb
349
351
  - test/unit/search/tc_fuzzy_query.rb
352
+ - test/unit/search/tc_multi_searcher2.rb
350
353
  - test/unit/search/tc_index_searcher.rb
351
354
  - test/unit/search/tc_spans.rb
352
355
  - test/unit/search/tc_filter.rb
353
356
  - test/unit/search/tc_sort.rb
354
357
  - test/unit/search/tc_sort_field.rb
358
+ - test/unit/search/tc_multi_searcher.rb
355
359
  - test/unit/search/rtc_sort_field.rb
356
360
  - test/unit/search/rtc_similarity.rb
357
361
  - test/unit/search/tc_search_and_sort.rb
@@ -364,6 +368,7 @@ files:
364
368
  - test/utils/number_to_spoken.rb
365
369
  - test/unit/analysis/data/wordfile
366
370
  - rake_utils/code_statistics.rb
371
+ - ext/termdocs.c
367
372
  test_files: []
368
373
 
369
374
  rdoc_options:
@@ -1,138 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class QueryParserTest < Test::Unit::TestCase
4
-
5
- def test_strings()
6
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
7
- pairs = [
8
- ['', ''],
9
- ['word', 'word'],
10
- ['field:word', 'field:word'],
11
- ['"word1 word2 word3"', '"word word word"'],
12
- ['"word1 2342 word3"', '"word word"'],
13
- ['field:"one two three"', 'field:"one two three"'],
14
- ['field:"one 222 three"', 'field:"one three"'],
15
- ['field:"one <> three"', 'field:"one <> three"'],
16
- ['field:"one <> three <>"', 'field:"one <> three"'],
17
- ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
18
- ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
19
- ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
20
- ['contents:"testing|trucks"', 'contents:testing contents:trucks'],
21
- ['[aaa bbb]', '[aaa bbb]'],
22
- ['{aaa bbb]', '{aaa bbb]'],
23
- ['field:[aaa bbb}', 'field:[aaa bbb}'],
24
- ['{aaa bbb}', '{aaa bbb}'],
25
- ['{aaa>', '{aaa>'],
26
- ['[aaa>', '[aaa>'],
27
- ['field:<aaa}', 'field:<aaa}'],
28
- ['<aaa]', '<aaa]'],
29
- ['>aaa', '{aaa>'],
30
- ['>=aaa', '[aaa>'],
31
- ['<aaa', '<aaa}'],
32
- ['field:<=aaa', 'field:<aaa]'],
33
- ['REQ one REQ two', '+one +two'],
34
- ['REQ one two', '+one two'],
35
- ['one REQ two', 'one +two'],
36
- ['+one +two', '+one +two'],
37
- ['+one two', '+one two'],
38
- ['one +two', 'one +two'],
39
- ['-one -two', '-one -two'],
40
- ['-one two', '-one two'],
41
- ['one -two', 'one -two'],
42
- ['!one !two', '-one -two'],
43
- ['!one two', '-one two'],
44
- ['one !two', 'one -two'],
45
- ['NOT one NOT two', '-one -two'],
46
- ['NOT one two', '-one two'],
47
- ['one NOT two', 'one -two'],
48
- ['one two', 'one two'],
49
- ['one OR two', 'one two'],
50
- ['one AND two', '+one +two'],
51
- ['one two AND three', 'one two +three'],
52
- ['one two OR three', 'one two three'],
53
- ['one (two AND three)', 'one (+two +three)'],
54
- ['one AND (two OR three)', '+one +(two three)'],
55
- ['field:(one AND (two OR three))', '+field:one +(field:two field:three)'],
56
- ['one AND (two OR [aaa vvv})', '+one +(two [aaa vvv})'],
57
- ['one AND (one:two OR two:three) AND four', '+one +(one:two two:three) +four'],
58
- ['one^1.23', 'one^1.23'],
59
- ['(one AND two)^100.23', '(+one +two)^100.23'],
60
- ['field:(one AND two)^100.23', '(+field:one +field:two)^100.23'],
61
- ['field:(one AND [aaa bbb]^23.3)^100.23', '(+field:one +field:[aaa bbb]^23.3)^100.23'],
62
- ['(REQ field:"one two three")^23', 'field:"one two three"^23.0'],
63
- ['asdf~0.2', 'asdf~0.2'],
64
- ['field:asdf~0.2', 'field:asdf~0.2'],
65
- ['asdf~0.2^100.0', 'asdf~0.2^100.0'],
66
- ['field:asdf~0.2^0.1', 'field:asdf~0.2^0.1'],
67
- ['field:"asdf <> asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'],
68
- ['"one two three four five"~5', '"one two three four five"~5'],
69
- ['ab?de', 'ab?de'],
70
- ['ab*de', 'ab*de'],
71
- ['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
72
- ['field:a* AND field:(b*)', '+field:a* +field:b*'],
73
- ['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'],
74
- ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
75
-
76
- ['*:xxx', 'f1:xxx f2:xxx f3:xxx'],
77
- ['f1|f2:xxx', 'f1:xxx f2:xxx'],
78
-
79
- ['*:asd~0.2', 'f1:asd~0.2 f2:asd~0.2 f3:asd~0.2'],
80
- ['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
81
-
82
- ['*:a?d*^20.0', '(f1:a?d* f2:a?d* f3:a?d*)^20.0'],
83
- ['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
84
-
85
- ['*:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy" f3:"asdf <> xxx|yyy"'],
86
- ['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
87
-
88
- ['*:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx] f3:[bbb xxx]'],
89
- ['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
90
-
91
- ['*:(xxx AND bbb)', '+(f1:xxx f2:xxx f3:xxx) +(f1:bbb f2:bbb f3:bbb)'],
92
- ['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
93
- ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
94
- ['"onewordphrase"', 'onewordphrase']
95
- ]
96
-
97
- pairs.each do |query_str, expected|
98
- assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
99
- end
100
- end
101
-
102
- def test_qp_with_standard_analyzer()
103
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"],
104
- :analyzer => Ferret::Analysis::StandardAnalyzer.new)
105
- pairs = [
106
- ['key:1234', 'key:1234'],
107
- ['key:(1234)', 'key:1234']
108
- ]
109
-
110
- pairs.each do |query_str, expected|
111
- assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
112
- end
113
- end
114
-
115
- def do_test_query_parse_exception_raised(str)
116
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
117
- assert_raise(Ferret::QueryParser::QueryParseException) do
118
- parser.parse(str)
119
- end
120
- end
121
-
122
-
123
- def test_bad_queries
124
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
125
- :handle_parse_errors => true)
126
-
127
- pairs = [
128
- ['::*word', 'word'],
129
- ['()*&)(*^&*(', ''],
130
- ['()*&one)(*two(*&"', '"one two"']
131
- ]
132
-
133
- pairs.each do |query_str, expected|
134
- do_test_query_parse_exception_raised(query_str)
135
- assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
136
- end
137
- end
138
- end