ferret 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/TODO +4 -1
- data/TUTORIAL +9 -1
- data/ext/ferret.c +28 -3
- data/ext/ferret.h +24 -24
- data/ext/index_io.c +13 -28
- data/ext/ram_directory.c +11 -11
- data/ext/segment_merge_queue.c +2 -2
- data/ext/string_helper.c +1 -1
- data/ext/term.c +19 -13
- data/ext/term_buffer.c +3 -3
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +8 -0
- data/lib/ferret/analysis/tokenizers.rb +6 -6
- data/lib/ferret/index/index.rb +120 -2
- data/lib/ferret/index/index_writer.rb +7 -4
- data/lib/ferret/index/multi_reader.rb +1 -1
- data/lib/ferret/index/segment_reader.rb +1 -1
- data/lib/ferret/query_parser.rb +25 -17
- data/lib/ferret/query_parser/query_parser.tab.rb +292 -208
- data/lib/ferret/search/range_query.rb +2 -2
- data/test/test_helper.rb +7 -0
- data/test/unit/index/tc_index.rb +10 -5
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +4 -1
- data/test/unit/query_parser/tc_query_parser.rb +43 -15
- data/test/unit/store/tm_store.rb +2 -2
- data/test/unit/ts_analysis.rb +1 -15
- data/test/unit/ts_document.rb +2 -4
- data/test/unit/ts_index.rb +2 -18
- data/test/unit/ts_query_parser.rb +2 -3
- data/test/unit/ts_search.rb +2 -10
- data/test/unit/ts_store.rb +1 -5
- data/test/unit/ts_utils.rb +1 -9
- metadata +2 -2
@@ -96,7 +96,7 @@ module Ferret::Search
|
|
96
96
|
buffer << (@include_lower ? "[" : "{")
|
97
97
|
buffer << @lower_term
|
98
98
|
else
|
99
|
-
buffer << "
|
99
|
+
buffer << "<"
|
100
100
|
end
|
101
101
|
|
102
102
|
buffer << " " if @upper_term and @lower_term
|
@@ -105,7 +105,7 @@ module Ferret::Search
|
|
105
105
|
buffer << @upper_term
|
106
106
|
buffer << (@include_upper ? "]" : "}")
|
107
107
|
else
|
108
|
-
buffer << "
|
108
|
+
buffer << ">"
|
109
109
|
end
|
110
110
|
|
111
111
|
if boost() != 1.0
|
data/test/test_helper.rb
CHANGED
@@ -4,3 +4,10 @@ $:.unshift File.join(File.dirname(__FILE__), '../ext')
|
|
4
4
|
require 'test/unit'
|
5
5
|
require 'ferret'
|
6
6
|
require 'test/unit/index/th_doc'
|
7
|
+
|
8
|
+
def load_test_dir(dir)
|
9
|
+
dir = File.join(File.dirname(__FILE__), dir)
|
10
|
+
Dir.foreach(dir) do |file|
|
11
|
+
require File.join(dir, file) if file =~ /^t[mcs]_.*\.rb$/
|
12
|
+
end
|
13
|
+
end
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -72,14 +72,14 @@ class IndexTest < Test::Unit::TestCase
|
|
72
72
|
|
73
73
|
def do_test_index_with_doc_array(index)
|
74
74
|
data = [
|
75
|
-
{"def_field" => "one two", :id => "me"},
|
76
|
-
{"def_field" => "one", :field2 => "three"},
|
75
|
+
{"def_field" => "one two multi", :id => "me"},
|
76
|
+
{"def_field" => "one", :field2 => "three multi"},
|
77
77
|
{"def_field" => "two"},
|
78
78
|
{"def_field" => "one", :field2 => "four"},
|
79
79
|
{"def_field" => "one two"},
|
80
80
|
{"def_field" => "two", :field2 => "three", "field3" => "four"},
|
81
|
-
{"def_field" => "one"},
|
82
|
-
{"def_field" => "two", :field2 => "three", "field3" => "five"}
|
81
|
+
{"def_field" => "one multi2"},
|
82
|
+
{"def_field" => "two", :field2 => "three multi2", "field3" => "five multi"}
|
83
83
|
]
|
84
84
|
data.each {|doc| index << doc }
|
85
85
|
q = "one AND two"
|
@@ -90,6 +90,10 @@ class IndexTest < Test::Unit::TestCase
|
|
90
90
|
check_results(index, q, [0, 1, 3, 4, 6, 7])
|
91
91
|
q = "two AND (field3:f*)"
|
92
92
|
check_results(index, q, [5, 7])
|
93
|
+
q = "*:(multi OR multi2)"
|
94
|
+
check_results(index, q, [0, 1, 6, 7])
|
95
|
+
q = "field2|field3:(multi OR multi2)"
|
96
|
+
check_results(index, q, [1, 7])
|
93
97
|
doc = index[5]
|
94
98
|
assert_equal("three", index[5]["field2"])
|
95
99
|
assert(!index.has_deletions?)
|
@@ -99,6 +103,7 @@ class IndexTest < Test::Unit::TestCase
|
|
99
103
|
assert(index.has_deletions?)
|
100
104
|
assert(index.deleted?(5))
|
101
105
|
assert_equal(7, index.size)
|
106
|
+
q = "two AND (field3:f*)"
|
102
107
|
check_results(index, q, [7])
|
103
108
|
doc["field2"] = "dave"
|
104
109
|
index << doc
|
@@ -114,7 +119,7 @@ class IndexTest < Test::Unit::TestCase
|
|
114
119
|
assert(! index.deleted?(7))
|
115
120
|
t = Term.new("field2", "four")
|
116
121
|
assert_equal("one", index[t]["def_field"])
|
117
|
-
assert_equal("one two", index["me"]["def_field"])
|
122
|
+
assert_equal("one two multi", index["me"]["def_field"])
|
118
123
|
index.delete("me")
|
119
124
|
assert(index.deleted?(0))
|
120
125
|
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
3
|
class MultipleTermDocPosEnumTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Index
|
4
5
|
include Ferret::Search
|
5
6
|
include Ferret::Analysis
|
6
7
|
|
7
8
|
def setup()
|
8
9
|
@dir = Ferret::Store::RAMDirectory.new()
|
9
|
-
iw = IndexWriter.new(@dir,
|
10
|
+
iw = IndexWriter.new(@dir,
|
11
|
+
:analyzer => WhiteSpaceAnalyzer.new(),
|
12
|
+
:create_if_missing => true)
|
10
13
|
@documents = IndexTestHelper.prepare_search_docs()
|
11
14
|
@documents.each { |doc| iw << doc }
|
12
15
|
iw.close()
|
@@ -2,12 +2,8 @@ require File.dirname(__FILE__) + "/../../test_helper"
|
|
2
2
|
|
3
3
|
class QueryParserTest < Test::Unit::TestCase
|
4
4
|
|
5
|
-
|
6
|
-
def setup()
|
7
|
-
@parser = Ferret::QueryParser.new("xxx")
|
8
|
-
end
|
9
|
-
|
10
5
|
def test_strings()
|
6
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
|
11
7
|
pairs = [
|
12
8
|
['word', 'word'],
|
13
9
|
['field:word', 'field:word'],
|
@@ -24,14 +20,14 @@ class QueryParserTest < Test::Unit::TestCase
|
|
24
20
|
['{aaa bbb]', '{aaa bbb]'],
|
25
21
|
['field:[aaa bbb}', 'field:[aaa bbb}'],
|
26
22
|
['{aaa bbb}', '{aaa bbb}'],
|
27
|
-
['{aaa
|
28
|
-
['[aaa
|
29
|
-
['field
|
30
|
-
['
|
31
|
-
['>aaa', '{aaa
|
32
|
-
['>=aaa', '[aaa
|
33
|
-
['<aaa', '
|
34
|
-
['field:<=aaa', 'field
|
23
|
+
['{aaa>', '{aaa>'],
|
24
|
+
['[aaa>', '[aaa>'],
|
25
|
+
['field:<aaa}', 'field:<aaa}'],
|
26
|
+
['<aaa]', '<aaa]'],
|
27
|
+
['>aaa', '{aaa>'],
|
28
|
+
['>=aaa', '[aaa>'],
|
29
|
+
['<aaa', '<aaa}'],
|
30
|
+
['field:<=aaa', 'field:<aaa]'],
|
35
31
|
['REQ one REQ two', '+one +two'],
|
36
32
|
['REQ one two', '+one two'],
|
37
33
|
['one REQ two', 'one +two'],
|
@@ -73,12 +69,44 @@ class QueryParserTest < Test::Unit::TestCase
|
|
73
69
|
['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
|
74
70
|
['field:a* AND field:(b*)', '+field:a* +field:b*'],
|
75
71
|
['field:abc~ AND field:(b*)', '+field:abc~0.5 +field:b*'],
|
76
|
-
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0']
|
72
|
+
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
|
73
|
+
|
74
|
+
['*:xxx', 'f1:xxx f2:xxx f3:xxx'],
|
75
|
+
['f1|f2:xxx', 'f1:xxx f2:xxx'],
|
76
|
+
|
77
|
+
['*:asd~0.2', 'f1:asd~0.2 f2:asd~0.2 f3:asd~0.2'],
|
78
|
+
['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
|
79
|
+
|
80
|
+
['*:a?d*^20.0', '(f1:a?d* f2:a?d* f3:a?d*)^20.0'],
|
81
|
+
['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
|
82
|
+
|
83
|
+
['*:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy" f3:"asdf <> xxx|yyy"'],
|
84
|
+
['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
|
85
|
+
|
86
|
+
['*:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx] f3:[bbb xxx]'],
|
87
|
+
['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
|
88
|
+
|
89
|
+
['*:(xxx AND bbb)', '+(f1:xxx f2:xxx f3:xxx) +(f1:bbb f2:bbb f3:bbb)'],
|
90
|
+
['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
|
91
|
+
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
|
92
|
+
['"onewordphrase"', 'onewordphrase']
|
77
93
|
]
|
94
|
+
|
95
|
+
pairs.each do |pair|
|
96
|
+
assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
|
97
|
+
end
|
98
|
+
end
|
78
99
|
|
100
|
+
def test_qp_with_standard_analyzer()
|
101
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"],
|
102
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
103
|
+
pairs = [
|
104
|
+
['key:1234', 'key:1234'],
|
105
|
+
['key:(1234)', 'key:1234']
|
106
|
+
]
|
79
107
|
|
80
108
|
pairs.each do |pair|
|
81
|
-
assert_equal(pair[1],
|
109
|
+
assert_equal(pair[1], parser.parse(pair[0]).to_s(parser.default_field))
|
82
110
|
end
|
83
111
|
end
|
84
112
|
end
|
data/test/unit/store/tm_store.rb
CHANGED
@@ -95,13 +95,13 @@ module StoreTest
|
|
95
95
|
end
|
96
96
|
|
97
97
|
def test_rw_utf8_strings
|
98
|
-
text = '
|
98
|
+
text = '³³ ÄÄÄÄÄÄ 道德經'
|
99
99
|
ostream = @dir.create_output("rw_utf8_strings.test")
|
100
100
|
ostream.write_string(text)
|
101
101
|
ostream.write_string(text*100)
|
102
102
|
ostream.close
|
103
103
|
istream = @dir.open_input("rw_utf8_strings.test")
|
104
|
-
assert_equal(text, istream.read_string, "Short string test failed")
|
104
|
+
assert_equal(text, x = istream.read_string, "Short string test failed")
|
105
105
|
assert_equal(text*100, istream.read_string, "Short string test failed")
|
106
106
|
istream.close
|
107
107
|
end
|
data/test/unit/ts_analysis.rb
CHANGED
@@ -1,16 +1,2 @@
|
|
1
|
-
$:.unshift File.dirname(__FILE__)
|
2
1
|
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
3
|
-
|
4
|
-
require 'analysis/tc_letter_tokenizer'
|
5
|
-
require 'analysis/tc_white_space_tokenizer'
|
6
|
-
require 'analysis/tc_lower_case_tokenizer'
|
7
|
-
require 'analysis/tc_word_list_loader'
|
8
|
-
require 'analysis/tc_lower_case_filter'
|
9
|
-
require 'analysis/tc_stop_filter'
|
10
|
-
require 'analysis/tc_porter_stem_filter'
|
11
|
-
require 'analysis/tc_analyzer'
|
12
|
-
require 'analysis/tc_stop_analyzer'
|
13
|
-
require 'analysis/tc_white_space_analyzer'
|
14
|
-
require 'analysis/tc_per_field_analyzer_wrapper'
|
15
|
-
require 'analysis/tc_standard_tokenizer'
|
16
|
-
require 'analysis/tc_standard_analyzer'
|
2
|
+
load_test_dir('unit/analysis')
|
data/test/unit/ts_document.rb
CHANGED
data/test/unit/ts_index.rb
CHANGED
@@ -1,18 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'index/tc_compound_file_io.rb'
|
4
|
-
require 'index/tc_field_infos.rb'
|
5
|
-
require 'index/tc_fields_io.rb'
|
6
|
-
require 'index/tc_index_writer.rb'
|
7
|
-
require 'index/tc_index_reader.rb'
|
8
|
-
require 'index/tc_segment_infos.rb'
|
9
|
-
require 'index/tc_segment_term_docs.rb'
|
10
|
-
require 'index/tc_segment_term_enum.rb'
|
11
|
-
require 'index/tc_segment_term_vector.rb'
|
12
|
-
require 'index/tc_term.rb'
|
13
|
-
require 'index/tc_term_buffer.rb'
|
14
|
-
require 'index/tc_term_info.rb'
|
15
|
-
require 'index/tc_term_infos_io.rb'
|
16
|
-
require 'index/tc_term_vector_offset_info.rb'
|
17
|
-
require 'index/tc_term_vectors_io.rb'
|
18
|
-
require 'index/tc_index.rb'
|
1
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
2
|
+
load_test_dir('unit/index')
|
@@ -1,3 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'query_parser/tc_query_parser.rb'
|
1
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
2
|
+
load_test_dir('unit/query_parser')
|
data/test/unit/ts_search.rb
CHANGED
@@ -1,10 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'search/tc_similarity.rb'
|
4
|
-
require 'search/tc_index_searcher.rb'
|
5
|
-
require 'search/tc_fuzzy_query.rb'
|
6
|
-
require 'search/tc_sort_field.rb'
|
7
|
-
require 'search/tc_sort.rb'
|
8
|
-
require 'search/tc_search_and_sort.rb'
|
9
|
-
require 'search/tc_filter.rb'
|
10
|
-
require 'search/tc_spans.rb'
|
1
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
2
|
+
load_test_dir('unit/search')
|
data/test/unit/ts_store.rb
CHANGED
data/test/unit/ts_utils.rb
CHANGED
@@ -1,10 +1,2 @@
|
|
1
|
-
$:.unshift File.dirname(__FILE__)
|
2
1
|
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
3
|
-
|
4
|
-
require 'utils/tc_string_helper'
|
5
|
-
require 'utils/tc_priority_queue'
|
6
|
-
require 'utils/tc_bit_vector'
|
7
|
-
require 'utils/tc_date_tools.rb'
|
8
|
-
require 'utils/tc_number_tools.rb'
|
9
|
-
require 'utils/tc_parameter.rb'
|
10
|
-
require 'utils/tc_weak_key_hash.rb'
|
2
|
+
load_test_dir('unit/utils')
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.1.2
|
7
|
+
date: 2005-10-25 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|