ferret 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/TODO +4 -1
- data/TUTORIAL +9 -1
- data/ext/ferret.c +28 -3
- data/ext/ferret.h +24 -24
- data/ext/index_io.c +13 -28
- data/ext/ram_directory.c +11 -11
- data/ext/segment_merge_queue.c +2 -2
- data/ext/string_helper.c +1 -1
- data/ext/term.c +19 -13
- data/ext/term_buffer.c +3 -3
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +8 -0
- data/lib/ferret/analysis/tokenizers.rb +6 -6
- data/lib/ferret/index/index.rb +120 -2
- data/lib/ferret/index/index_writer.rb +7 -4
- data/lib/ferret/index/multi_reader.rb +1 -1
- data/lib/ferret/index/segment_reader.rb +1 -1
- data/lib/ferret/query_parser.rb +25 -17
- data/lib/ferret/query_parser/query_parser.tab.rb +292 -208
- data/lib/ferret/search/range_query.rb +2 -2
- data/test/test_helper.rb +7 -0
- data/test/unit/index/tc_index.rb +10 -5
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +4 -1
- data/test/unit/query_parser/tc_query_parser.rb +43 -15
- data/test/unit/store/tm_store.rb +2 -2
- data/test/unit/ts_analysis.rb +1 -15
- data/test/unit/ts_document.rb +2 -4
- data/test/unit/ts_index.rb +2 -18
- data/test/unit/ts_query_parser.rb +2 -3
- data/test/unit/ts_search.rb +2 -10
- data/test/unit/ts_store.rb +1 -5
- data/test/unit/ts_utils.rb +1 -9
- metadata +2 -2
@@ -96,7 +96,7 @@ module Ferret::Search
|
|
96
96
|
buffer << (@include_lower ? "[" : "{")
|
97
97
|
buffer << @lower_term
|
98
98
|
else
|
99
|
-
buffer << "
|
99
|
+
buffer << "<"
|
100
100
|
end
|
101
101
|
|
102
102
|
buffer << " " if @upper_term and @lower_term
|
@@ -105,7 +105,7 @@ module Ferret::Search
|
|
105
105
|
buffer << @upper_term
|
106
106
|
buffer << (@include_upper ? "]" : "}")
|
107
107
|
else
|
108
|
-
buffer << "
|
108
|
+
buffer << ">"
|
109
109
|
end
|
110
110
|
|
111
111
|
if boost() != 1.0
|
data/test/test_helper.rb
CHANGED
@@ -4,3 +4,10 @@ $:.unshift File.join(File.dirname(__FILE__), '../ext')
|
|
4
4
|
require 'test/unit'
|
5
5
|
require 'ferret'
|
6
6
|
require 'test/unit/index/th_doc'
|
7
|
+
|
8
|
+
def load_test_dir(dir)
|
9
|
+
dir = File.join(File.dirname(__FILE__), dir)
|
10
|
+
Dir.foreach(dir) do |file|
|
11
|
+
require File.join(dir, file) if file =~ /^t[mcs]_.*\.rb$/
|
12
|
+
end
|
13
|
+
end
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -72,14 +72,14 @@ class IndexTest < Test::Unit::TestCase
|
|
72
72
|
|
73
73
|
def do_test_index_with_doc_array(index)
|
74
74
|
data = [
|
75
|
-
{"def_field" => "one two", :id => "me"},
|
76
|
-
{"def_field" => "one", :field2 => "three"},
|
75
|
+
{"def_field" => "one two multi", :id => "me"},
|
76
|
+
{"def_field" => "one", :field2 => "three multi"},
|
77
77
|
{"def_field" => "two"},
|
78
78
|
{"def_field" => "one", :field2 => "four"},
|
79
79
|
{"def_field" => "one two"},
|
80
80
|
{"def_field" => "two", :field2 => "three", "field3" => "four"},
|
81
|
-
{"def_field" => "one"},
|
82
|
-
{"def_field" => "two", :field2 => "three", "field3" => "five"}
|
81
|
+
{"def_field" => "one multi2"},
|
82
|
+
{"def_field" => "two", :field2 => "three multi2", "field3" => "five multi"}
|
83
83
|
]
|
84
84
|
data.each {|doc| index << doc }
|
85
85
|
q = "one AND two"
|
@@ -90,6 +90,10 @@ class IndexTest < Test::Unit::TestCase
|
|
90
90
|
check_results(index, q, [0, 1, 3, 4, 6, 7])
|
91
91
|
q = "two AND (field3:f*)"
|
92
92
|
check_results(index, q, [5, 7])
|
93
|
+
q = "*:(multi OR multi2)"
|
94
|
+
check_results(index, q, [0, 1, 6, 7])
|
95
|
+
q = "field2|field3:(multi OR multi2)"
|
96
|
+
check_results(index, q, [1, 7])
|
93
97
|
doc = index[5]
|
94
98
|
assert_equal("three", index[5]["field2"])
|
95
99
|
assert(!index.has_deletions?)
|
@@ -99,6 +103,7 @@ class IndexTest < Test::Unit::TestCase
|
|
99
103
|
assert(index.has_deletions?)
|
100
104
|
assert(index.deleted?(5))
|
101
105
|
assert_equal(7, index.size)
|
106
|
+
q = "two AND (field3:f*)"
|
102
107
|
check_results(index, q, [7])
|
103
108
|
doc["field2"] = "dave"
|
104
109
|
index << doc
|
@@ -114,7 +119,7 @@ class IndexTest < Test::Unit::TestCase
|
|
114
119
|
assert(! index.deleted?(7))
|
115
120
|
t = Term.new("field2", "four")
|
116
121
|
assert_equal("one", index[t]["def_field"])
|
117
|
-
assert_equal("one two", index["me"]["def_field"])
|
122
|
+
assert_equal("one two multi", index["me"]["def_field"])
|
118
123
|
index.delete("me")
|
119
124
|
assert(index.deleted?(0))
|
120
125
|
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
3
|
class MultipleTermDocPosEnumTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Index
|
4
5
|
include Ferret::Search
|
5
6
|
include Ferret::Analysis
|
6
7
|
|
7
8
|
def setup()
|
8
9
|
@dir = Ferret::Store::RAMDirectory.new()
|
9
|
-
iw = IndexWriter.new(@dir,
|
10
|
+
iw = IndexWriter.new(@dir,
|
11
|
+
:analyzer => WhiteSpaceAnalyzer.new(),
|
12
|
+
:create_if_missing => true)
|
10
13
|
@documents = IndexTestHelper.prepare_search_docs()
|
11
14
|
@documents.each { |doc| iw << doc }
|
12
15
|
iw.close()
|
@@ -2,12 +2,8 @@ require File.dirname(__FILE__) + "/../../test_helper"
|
|
2
2
|
|
3
3
|
class QueryParserTest < Test::Unit::TestCase
|
4
4
|
|
5
|
-
|
6
|
-
def setup()
|
7
|
-
@parser = Ferret::QueryParser.new("xxx")
|
8
|
-
end
|
9
|
-
|
10
5
|
def test_strings()
|
6
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
|
11
7
|
pairs = [
|
12
8
|
['word', 'word'],
|
13
9
|
['field:word', 'field:word'],
|
@@ -24,14 +20,14 @@ class QueryParserTest < Test::Unit::TestCase
|
|
24
20
|
['{aaa bbb]', '{aaa bbb]'],
|
25
21
|
['field:[aaa bbb}', 'field:[aaa bbb}'],
|
26
22
|
['{aaa bbb}', '{aaa bbb}'],
|
27
|
-
['{aaa
|
28
|
-
['[aaa
|
29
|
-
['field
|
30
|
-
['
|
31
|
-
['>aaa', '{aaa
|
32
|
-
['>=aaa', '[aaa
|
33
|
-
['<aaa', '
|
34
|
-
['field:<=aaa', 'field
|
23
|
+
['{aaa>', '{aaa>'],
|
24
|
+
['[aaa>', '[aaa>'],
|
25
|
+
['field:<aaa}', 'field:<aaa}'],
|
26
|
+
['<aaa]', '<aaa]'],
|
27
|
+
['>aaa', '{aaa>'],
|
28
|
+
['>=aaa', '[aaa>'],
|
29
|
+
['<aaa', '<aaa}'],
|
30
|
+
['field:<=aaa', 'field:<aaa]'],
|
35
31
|
['REQ one REQ two', '+one +two'],
|
36
32
|
['REQ one two', '+one two'],
|
37
33
|
['one REQ two', 'one +two'],
|
@@ -73,12 +69,44 @@ class QueryParserTest < Test::Unit::TestCase
|
|
73
69
|
['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
|
74
70
|
['field:a* AND field:(b*)', '+field:a* +field:b*'],
|
75
71
|
['field:abc~ AND field:(b*)', '+field:abc~0.5 +field:b*'],
|
76
|
-
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0']
|
72
|
+
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
|
73
|
+
|
74
|
+
['*:xxx', 'f1:xxx f2:xxx f3:xxx'],
|
75
|
+
['f1|f2:xxx', 'f1:xxx f2:xxx'],
|
76
|
+
|
77
|
+
['*:asd~0.2', 'f1:asd~0.2 f2:asd~0.2 f3:asd~0.2'],
|
78
|
+
['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
|
79
|
+
|
80
|
+
['*:a?d*^20.0', '(f1:a?d* f2:a?d* f3:a?d*)^20.0'],
|
81
|
+
['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
|
82
|
+
|
83
|
+
['*:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy" f3:"asdf <> xxx|yyy"'],
|
84
|
+
['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
|
85
|
+
|
86
|
+
['*:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx] f3:[bbb xxx]'],
|
87
|
+
['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
|
88
|
+
|
89
|
+
['*:(xxx AND bbb)', '+(f1:xxx f2:xxx f3:xxx) +(f1:bbb f2:bbb f3:bbb)'],
|
90
|
+
['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
|
91
|
+
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
|
92
|
+
['"onewordphrase"', 'onewordphrase']
|
77
93
|
]
|
94
|
+
|
95
|
+
pairs.each do |pair|
|
96
|
+
assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
|
97
|
+
end
|
98
|
+
end
|
78
99
|
|
100
|
+
def test_qp_with_standard_analyzer()
|
101
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"],
|
102
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
103
|
+
pairs = [
|
104
|
+
['key:1234', 'key:1234'],
|
105
|
+
['key:(1234)', 'key:1234']
|
106
|
+
]
|
79
107
|
|
80
108
|
pairs.each do |pair|
|
81
|
-
assert_equal(pair[1],
|
109
|
+
assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
|
82
110
|
end
|
83
111
|
end
|
84
112
|
end
|
data/test/unit/store/tm_store.rb
CHANGED
@@ -95,13 +95,13 @@ module StoreTest
|
|
95
95
|
end
|
96
96
|
|
97
97
|
def test_rw_utf8_strings
|
98
|
-
text = '
|
98
|
+
text = '³³ ÄÄÄÄÄÄ 道德經'
|
99
99
|
ostream = @dir.create_output("rw_utf8_strings.test")
|
100
100
|
ostream.write_string(text)
|
101
101
|
ostream.write_string(text*100)
|
102
102
|
ostream.close
|
103
103
|
istream = @dir.open_input("rw_utf8_strings.test")
|
104
|
-
assert_equal(text, istream.read_string, "Short string test failed")
|
104
|
+
assert_equal(text, x = istream.read_string, "Short string test failed")
|
105
105
|
assert_equal(text*100, istream.read_string, "Short string test failed")
|
106
106
|
istream.close
|
107
107
|
end
|
data/test/unit/ts_analysis.rb
CHANGED
@@ -1,16 +1,2 @@
|
|
1
|
-
$:.unshift File.dirname(__FILE__)
|
2
1
|
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
3
|
-
|
4
|
-
require 'analysis/tc_letter_tokenizer'
|
5
|
-
require 'analysis/tc_white_space_tokenizer'
|
6
|
-
require 'analysis/tc_lower_case_tokenizer'
|
7
|
-
require 'analysis/tc_word_list_loader'
|
8
|
-
require 'analysis/tc_lower_case_filter'
|
9
|
-
require 'analysis/tc_stop_filter'
|
10
|
-
require 'analysis/tc_porter_stem_filter'
|
11
|
-
require 'analysis/tc_analyzer'
|
12
|
-
require 'analysis/tc_stop_analyzer'
|
13
|
-
require 'analysis/tc_white_space_analyzer'
|
14
|
-
require 'analysis/tc_per_field_analyzer_wrapper'
|
15
|
-
require 'analysis/tc_standard_tokenizer'
|
16
|
-
require 'analysis/tc_standard_analyzer'
|
2
|
+
load_test_dir('unit/analysis')
|
data/test/unit/ts_document.rb
CHANGED
data/test/unit/ts_index.rb
CHANGED
@@ -1,18 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'index/tc_compound_file_io.rb'
|
4
|
-
require 'index/tc_field_infos.rb'
|
5
|
-
require 'index/tc_fields_io.rb'
|
6
|
-
require 'index/tc_index_writer.rb'
|
7
|
-
require 'index/tc_index_reader.rb'
|
8
|
-
require 'index/tc_segment_infos.rb'
|
9
|
-
require 'index/tc_segment_term_docs.rb'
|
10
|
-
require 'index/tc_segment_term_enum.rb'
|
11
|
-
require 'index/tc_segment_term_vector.rb'
|
12
|
-
require 'index/tc_term.rb'
|
13
|
-
require 'index/tc_term_buffer.rb'
|
14
|
-
require 'index/tc_term_info.rb'
|
15
|
-
require 'index/tc_term_infos_io.rb'
|
16
|
-
require 'index/tc_term_vector_offset_info.rb'
|
17
|
-
require 'index/tc_term_vectors_io.rb'
|
18
|
-
require 'index/tc_index.rb'
|
1
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
2
|
+
load_test_dir('unit/index')
|
@@ -1,3 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'query_parser/tc_query_parser.rb'
|
1
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
2
|
+
load_test_dir('unit/query_parser')
|
data/test/unit/ts_search.rb
CHANGED
@@ -1,10 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'search/tc_similarity.rb'
|
4
|
-
require 'search/tc_index_searcher.rb'
|
5
|
-
require 'search/tc_fuzzy_query.rb'
|
6
|
-
require 'search/tc_sort_field.rb'
|
7
|
-
require 'search/tc_sort.rb'
|
8
|
-
require 'search/tc_search_and_sort.rb'
|
9
|
-
require 'search/tc_filter.rb'
|
10
|
-
require 'search/tc_spans.rb'
|
1
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
2
|
+
load_test_dir('unit/search')
|
data/test/unit/ts_store.rb
CHANGED
data/test/unit/ts_utils.rb
CHANGED
@@ -1,10 +1,2 @@
|
|
1
|
-
$:.unshift File.dirname(__FILE__)
|
2
1
|
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
3
|
-
|
4
|
-
require 'utils/tc_string_helper'
|
5
|
-
require 'utils/tc_priority_queue'
|
6
|
-
require 'utils/tc_bit_vector'
|
7
|
-
require 'utils/tc_date_tools.rb'
|
8
|
-
require 'utils/tc_number_tools.rb'
|
9
|
-
require 'utils/tc_parameter.rb'
|
10
|
-
require 'utils/tc_weak_key_hash.rb'
|
2
|
+
load_test_dir('unit/utils')
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.1.2
|
7
|
+
date: 2005-10-25 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|