ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
@@ -44,8 +44,74 @@ class SearchAndSortTest < Test::Unit::TestCase
44
44
  top_docs.total_hits.times do |i|
45
45
  assert_equal(expected[i], top_docs.score_docs[i].doc)
46
46
  end
47
+
48
+ # test sorting works for smaller ranged query
49
+ first_doc = 3
50
+ num_docs = 3
51
+ top_docs = is.search(query, {:sort => sort,
52
+ :first_doc => first_doc,
53
+ :num_docs => num_docs})
54
+ num_docs.times do |i|
55
+ assert_equal(expected[first_doc + i], top_docs.score_docs[i].doc)
56
+ end
57
+ end
58
+
59
+ def test_sort_field_to_s()
60
+ assert_equal("<SCORE>", SortField::FIELD_SCORE.to_s);
61
+ sf = SortField.new("MyScore",
62
+ {:sort_type => SortField::SortType::SCORE,
63
+ :reverse => true})
64
+ assert_equal("MyScore:<SCORE>!", sf.to_s)
65
+ assert_equal("<DOC>", SortField::FIELD_DOC.to_s);
66
+ sf = SortField.new("MyDoc",
67
+ {:sort_type => SortField::SortType::DOC,
68
+ :reverse => true})
69
+ assert_equal("MyDoc:<DOC>!", sf.to_s)
70
+ sf = SortField.new("date",
71
+ {:sort_type => SortField::SortType::INTEGER})
72
+ assert_equal("date:<integer>", sf.to_s)
73
+ sf = SortField.new("date",
74
+ {:sort_type => SortField::SortType::INTEGER,
75
+ :reverse => true})
76
+ assert_equal("date:<integer>!", sf.to_s)
77
+ sf = SortField.new("price",
78
+ {:sort_type => SortField::SortType::FLOAT})
79
+ assert_equal("price:<float>", sf.to_s)
80
+ sf = SortField.new("price",
81
+ {:sort_type => SortField::SortType::FLOAT,
82
+ :reverse => true})
83
+ assert_equal("price:<float>!", sf.to_s)
84
+ sf = SortField.new("content",
85
+ {:sort_type => SortField::SortType::STRING})
86
+ assert_equal("content:<string>", sf.to_s)
87
+ sf = SortField.new("content",
88
+ {:sort_type => SortField::SortType::STRING,
89
+ :reverse => true})
90
+ assert_equal("content:<string>!", sf.to_s)
91
+ sf = SortField.new("auto_field",
92
+ {:sort_type => SortField::SortType::AUTO})
93
+ assert_equal("auto_field:<auto>", sf.to_s)
94
+ sf = SortField.new("auto_field",
95
+ {:sort_type => SortField::SortType::AUTO,
96
+ :reverse => true})
97
+ assert_equal("auto_field:<auto>!", sf.to_s)
98
+ end
99
+
100
+ def test_sort_to_s()
101
+ sort = Sort.new
102
+ assert_equal("Sort[<SCORE>, <DOC>]", sort.to_s)
103
+ sf = SortField.new("auto_field",
104
+ {:sort_type => SortField::SortType::AUTO,
105
+ :reverse => true})
106
+ sort = Sort.new([sf, SortField::FIELD_SCORE, SortField::FIELD_DOC])
107
+ assert_equal("Sort[auto_field:<auto>!, <SCORE>, <DOC>]", sort.to_s)
108
+ sort = Sort.new(["one", "two", SortField::FIELD_DOC])
109
+ assert_equal("Sort[one:<auto>, two:<auto>, <DOC>]", sort.to_s)
110
+ sort = Sort.new(["one", "two"])
111
+ assert_equal("Sort[one:<auto>, two:<auto>, <DOC>]", sort.to_s)
47
112
  end
48
113
 
114
+
49
115
  def test_sorts()
50
116
  is = IndexSearcher.new(@dir)
51
117
  q = TermQuery.new(Term.new("search", "findall"))
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.1
7
- date: 2006-04-11 00:00:00 +09:00
6
+ version: 0.9.2
7
+ date: 2006-05-11 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -30,9 +30,9 @@ authors:
30
30
  files:
31
31
  - setup.rb
32
32
  - Rakefile
33
+ - TODO
33
34
  - README
34
35
  - MIT-LICENSE
35
- - TODO
36
36
  - TUTORIAL
37
37
  - CHANGELOG
38
38
  - ext/ferret.c
@@ -51,7 +51,6 @@ files:
51
51
  - ext/document.c
52
52
  - ext/compound_io.c
53
53
  - ext/index_rw.c
54
- - ext/termdocs.c
55
54
  - ext/vector.c
56
55
  - ext/field.c
57
56
  - ext/term.c
@@ -60,20 +59,20 @@ files:
60
59
  - ext/q_boolean.c
61
60
  - ext/q_match_all.c
62
61
  - ext/q_phrase.c
63
- - ext/q_fuzzy.c
62
+ - ext/q_filtered_query.c
64
63
  - ext/search.c
65
64
  - ext/dummy.exe
66
- - ext/q_multi_phrase.c
65
+ - ext/q_fuzzy.c
67
66
  - ext/q_wildcard.c
68
67
  - ext/ind.c
69
68
  - ext/q_range.c
69
+ - ext/q_multi_phrase.c
70
70
  - ext/q_prefix.c
71
71
  - ext/q_span.c
72
72
  - ext/filter.c
73
73
  - ext/similarity.c
74
- - ext/sort.c
75
74
  - ext/q_term.c
76
- - ext/q_filtered_query.c
75
+ - ext/sort.c
77
76
  - ext/index_io.c
78
77
  - ext/fs_store.c
79
78
  - ext/ram_store.c
@@ -86,22 +85,6 @@ files:
86
85
  - ext/hash.c
87
86
  - ext/except.c
88
87
  - ext/priorityqueue.c
89
- - ext/document.h
90
- - ext/store.h
91
- - ext/array.h
92
- - ext/priorityqueue.h
93
- - ext/hashset.h
94
- - ext/helper.h
95
- - ext/global.h
96
- - ext/lang.h
97
- - ext/bitvector.h
98
- - ext/analysis.h
99
- - ext/hash.h
100
- - ext/search.h
101
- - ext/ferret.h
102
- - ext/index.h
103
- - ext/except.h
104
- - ext/similarity.h
105
88
  - ext/libstemmer.h
106
89
  - ext/libstemmer.c
107
90
  - ext/modules.h
@@ -112,6 +95,7 @@ files:
112
95
  - ext/stem_ISO_8859_1_italian.c
113
96
  - ext/stem_UTF_8_portuguese.c
114
97
  - ext/stem_UTF_8_portuguese.h
98
+ - ext/ferret.h
115
99
  - ext/stem_UTF_8_french.c
116
100
  - ext/stem_UTF_8_spanish.c
117
101
  - ext/stem_UTF_8_dutch.c
@@ -130,7 +114,6 @@ files:
130
114
  - ext/stem_ISO_8859_1_portuguese.c
131
115
  - ext/stem_UTF_8_russian.c
132
116
  - ext/stem_ISO_8859_1_spanish.c
133
- - ext/tags
134
117
  - ext/stem_ISO_8859_1_french.c
135
118
  - ext/stem_ISO_8859_1_portuguese.h
136
119
  - ext/stem_ISO_8859_1_dutch.c
@@ -140,6 +123,7 @@ files:
140
123
  - ext/stem_ISO_8859_1_spanish.h
141
124
  - ext/stem_ISO_8859_1_french.h
142
125
  - ext/stem_ISO_8859_1_porter.c
126
+ - ext/tags
143
127
  - ext/stem_ISO_8859_1_dutch.h
144
128
  - ext/stem_UTF_8_finnish.c
145
129
  - ext/stem_KOI8_R_russian.h
@@ -162,6 +146,24 @@ files:
162
146
  - ext/stem_ISO_8859_1_danish.h
163
147
  - ext/stem_UTF_8_english.h
164
148
  - ext/stem_UTF_8_norwegian.h
149
+ - ext/document.h
150
+ - ext/store.h
151
+ - ext/array.h
152
+ - ext/priorityqueue.h
153
+ - ext/hashset.h
154
+ - ext/helper.h
155
+ - ext/global.h
156
+ - ext/bitvector.h
157
+ - ext/analysis.h
158
+ - ext/hash.h
159
+ - ext/search.h
160
+ - ext/similarity.h
161
+ - ext/index.h
162
+ - ext/except.h
163
+ - ext/lang.h
164
+ - ext/frtio.h
165
+ - ext/w32_io.c
166
+ - ext/nix_io.c
165
167
  - ext/inc/lang.h
166
168
  - ext/inc/except.h
167
169
  - lib/ferret.rb
@@ -257,6 +259,7 @@ files:
257
259
  - lib/ferret/search/range_filter.rb
258
260
  - lib/ferret/search/field_cache.rb
259
261
  - lib/ferret/search/match_all_query.rb
262
+ - lib/ferret/search/multi_searcher.rb
260
263
  - lib/ferret/search/spans/near_spans_enum.rb
261
264
  - lib/ferret/search/spans/span_first_query.rb
262
265
  - lib/ferret/search/spans/spans_enum.rb
@@ -345,13 +348,14 @@ files:
345
348
  - test/unit/document/rtc_field.rb
346
349
  - test/unit/document/tc_document.rb
347
350
  - test/unit/query_parser/tc_query_parser.rb
348
- - test/unit/query_parser/rtc_query_parser.rb
349
351
  - test/unit/search/tc_fuzzy_query.rb
352
+ - test/unit/search/tc_multi_searcher2.rb
350
353
  - test/unit/search/tc_index_searcher.rb
351
354
  - test/unit/search/tc_spans.rb
352
355
  - test/unit/search/tc_filter.rb
353
356
  - test/unit/search/tc_sort.rb
354
357
  - test/unit/search/tc_sort_field.rb
358
+ - test/unit/search/tc_multi_searcher.rb
355
359
  - test/unit/search/rtc_sort_field.rb
356
360
  - test/unit/search/rtc_similarity.rb
357
361
  - test/unit/search/tc_search_and_sort.rb
@@ -364,6 +368,7 @@ files:
364
368
  - test/utils/number_to_spoken.rb
365
369
  - test/unit/analysis/data/wordfile
366
370
  - rake_utils/code_statistics.rb
371
+ - ext/termdocs.c
367
372
  test_files: []
368
373
 
369
374
  rdoc_options:
@@ -1,138 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class QueryParserTest < Test::Unit::TestCase
4
-
5
- def test_strings()
6
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
7
- pairs = [
8
- ['', ''],
9
- ['word', 'word'],
10
- ['field:word', 'field:word'],
11
- ['"word1 word2 word3"', '"word word word"'],
12
- ['"word1 2342 word3"', '"word word"'],
13
- ['field:"one two three"', 'field:"one two three"'],
14
- ['field:"one 222 three"', 'field:"one three"'],
15
- ['field:"one <> three"', 'field:"one <> three"'],
16
- ['field:"one <> three <>"', 'field:"one <> three"'],
17
- ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
18
- ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
19
- ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
20
- ['contents:"testing|trucks"', 'contents:testing contents:trucks'],
21
- ['[aaa bbb]', '[aaa bbb]'],
22
- ['{aaa bbb]', '{aaa bbb]'],
23
- ['field:[aaa bbb}', 'field:[aaa bbb}'],
24
- ['{aaa bbb}', '{aaa bbb}'],
25
- ['{aaa>', '{aaa>'],
26
- ['[aaa>', '[aaa>'],
27
- ['field:<aaa}', 'field:<aaa}'],
28
- ['<aaa]', '<aaa]'],
29
- ['>aaa', '{aaa>'],
30
- ['>=aaa', '[aaa>'],
31
- ['<aaa', '<aaa}'],
32
- ['field:<=aaa', 'field:<aaa]'],
33
- ['REQ one REQ two', '+one +two'],
34
- ['REQ one two', '+one two'],
35
- ['one REQ two', 'one +two'],
36
- ['+one +two', '+one +two'],
37
- ['+one two', '+one two'],
38
- ['one +two', 'one +two'],
39
- ['-one -two', '-one -two'],
40
- ['-one two', '-one two'],
41
- ['one -two', 'one -two'],
42
- ['!one !two', '-one -two'],
43
- ['!one two', '-one two'],
44
- ['one !two', 'one -two'],
45
- ['NOT one NOT two', '-one -two'],
46
- ['NOT one two', '-one two'],
47
- ['one NOT two', 'one -two'],
48
- ['one two', 'one two'],
49
- ['one OR two', 'one two'],
50
- ['one AND two', '+one +two'],
51
- ['one two AND three', 'one two +three'],
52
- ['one two OR three', 'one two three'],
53
- ['one (two AND three)', 'one (+two +three)'],
54
- ['one AND (two OR three)', '+one +(two three)'],
55
- ['field:(one AND (two OR three))', '+field:one +(field:two field:three)'],
56
- ['one AND (two OR [aaa vvv})', '+one +(two [aaa vvv})'],
57
- ['one AND (one:two OR two:three) AND four', '+one +(one:two two:three) +four'],
58
- ['one^1.23', 'one^1.23'],
59
- ['(one AND two)^100.23', '(+one +two)^100.23'],
60
- ['field:(one AND two)^100.23', '(+field:one +field:two)^100.23'],
61
- ['field:(one AND [aaa bbb]^23.3)^100.23', '(+field:one +field:[aaa bbb]^23.3)^100.23'],
62
- ['(REQ field:"one two three")^23', 'field:"one two three"^23.0'],
63
- ['asdf~0.2', 'asdf~0.2'],
64
- ['field:asdf~0.2', 'field:asdf~0.2'],
65
- ['asdf~0.2^100.0', 'asdf~0.2^100.0'],
66
- ['field:asdf~0.2^0.1', 'field:asdf~0.2^0.1'],
67
- ['field:"asdf <> asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'],
68
- ['"one two three four five"~5', '"one two three four five"~5'],
69
- ['ab?de', 'ab?de'],
70
- ['ab*de', 'ab*de'],
71
- ['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
72
- ['field:a* AND field:(b*)', '+field:a* +field:b*'],
73
- ['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'],
74
- ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
75
-
76
- ['*:xxx', 'f1:xxx f2:xxx f3:xxx'],
77
- ['f1|f2:xxx', 'f1:xxx f2:xxx'],
78
-
79
- ['*:asd~0.2', 'f1:asd~0.2 f2:asd~0.2 f3:asd~0.2'],
80
- ['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
81
-
82
- ['*:a?d*^20.0', '(f1:a?d* f2:a?d* f3:a?d*)^20.0'],
83
- ['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
84
-
85
- ['*:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy" f3:"asdf <> xxx|yyy"'],
86
- ['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
87
-
88
- ['*:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx] f3:[bbb xxx]'],
89
- ['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
90
-
91
- ['*:(xxx AND bbb)', '+(f1:xxx f2:xxx f3:xxx) +(f1:bbb f2:bbb f3:bbb)'],
92
- ['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
93
- ['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
94
- ['"onewordphrase"', 'onewordphrase']
95
- ]
96
-
97
- pairs.each do |query_str, expected|
98
- assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
99
- end
100
- end
101
-
102
- def test_qp_with_standard_analyzer()
103
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"],
104
- :analyzer => Ferret::Analysis::StandardAnalyzer.new)
105
- pairs = [
106
- ['key:1234', 'key:1234'],
107
- ['key:(1234)', 'key:1234']
108
- ]
109
-
110
- pairs.each do |query_str, expected|
111
- assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
112
- end
113
- end
114
-
115
- def do_test_query_parse_exception_raised(str)
116
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
117
- assert_raise(Ferret::QueryParser::QueryParseException) do
118
- parser.parse(str)
119
- end
120
- end
121
-
122
-
123
- def test_bad_queries
124
- parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
125
- :handle_parse_errors => true)
126
-
127
- pairs = [
128
- ['::*word', 'word'],
129
- ['()*&)(*^&*(', ''],
130
- ['()*&one)(*two(*&"', '"one two"']
131
- ]
132
-
133
- pairs.each do |query_str, expected|
134
- do_test_query_parse_exception_raised(query_str)
135
- assert_equal(expected, parser.parse(query_str).to_s(parser.default_field))
136
- end
137
- end
138
- end