ferret 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -3
- data/Rakefile +2 -1
- data/TUTORIAL +3 -3
- data/lib/ferret.rb +1 -1
- data/lib/ferret/index/index.rb +1 -0
- data/lib/ferret/index/index_writer.rb +3 -2
- data/lib/ferret/query_parser.rb +2 -2
- data/test/unit/analysis/data/wordfile +6 -0
- data/test/unit/index/tc_index.rb +27 -2
- metadata +4 -3
data/README
CHANGED
@@ -87,10 +87,17 @@ only run on linux.
|
|
87
87
|
|
88
88
|
== Contact
|
89
89
|
|
90
|
-
|
91
|
-
the mailing list. More information on the list can be found at:
|
90
|
+
For bug reports and patches I have set up Trac here;
|
92
91
|
|
93
|
-
http://ferret.davebalmain.com/
|
92
|
+
http://ferret.davebalmain.com/trac
|
93
|
+
|
94
|
+
Queries, discussion etc should be addressed to the forum or mailing lists hosted
|
95
|
+
at;
|
96
|
+
|
97
|
+
http://rubyforge.org/projects/ferret/
|
98
|
+
|
99
|
+
Alternatively you could create a new page for discussion on the wiki at my Trac
|
100
|
+
page above. Or, if you're shy, please feel free to email me directly at dbalmain@gmail.com
|
94
101
|
|
95
102
|
Of course, since Ferret is almost a straight port of Java Lucene,
|
96
103
|
everything said about Lucene at http://jakarta.apache.org/lucene/ should
|
data/Rakefile
CHANGED
@@ -115,6 +115,7 @@ PKG_FILES = FileList[
|
|
115
115
|
'ext/**/*',
|
116
116
|
'lib/**/*.rb',
|
117
117
|
'test/**/*.rb',
|
118
|
+
'test/**/wordfile',
|
118
119
|
'rake_utils/**/*.rb',
|
119
120
|
'Rakefile'
|
120
121
|
]
|
@@ -170,7 +171,7 @@ else
|
|
170
171
|
|
171
172
|
s.author = "David Balmain"
|
172
173
|
s.email = "dbalmain@gmail.com"
|
173
|
-
s.homepage = "http://ferret.davebalmain.com"
|
174
|
+
s.homepage = "http://ferret.davebalmain.com/trac"
|
174
175
|
s.rubyforge_project = "ferret"
|
175
176
|
# if ENV['CERT_DIR']
|
176
177
|
# s.signing_key = File.join(ENV['CERT_DIR'], 'gem-private_key.pem')
|
data/TUTORIAL
CHANGED
@@ -74,21 +74,21 @@ Index#search_each. The first method returns a Ferret::Index::TopDocs object.
|
|
74
74
|
The second we'll show here. Lets say we wanted to find all documents with the
|
75
75
|
phrase "quick brown fox" in the content field. We'd write;
|
76
76
|
|
77
|
-
index.
|
77
|
+
index.search_each('content:"quick brown fox"') do |doc, score|
|
78
78
|
puts "Document #{doc} found with a score of #{score}"
|
79
79
|
end
|
80
80
|
|
81
81
|
But "fast" has a pretty similar meaning to "quick" and we don't mind if the
|
82
82
|
fox is a little red. So we could expand our search like this;
|
83
83
|
|
84
|
-
index.
|
84
|
+
index.search_each('content:"quick|fast brown|red fox"') do |doc, score|
|
85
85
|
puts "Document #{doc} found with a score of #{score}"
|
86
86
|
end
|
87
87
|
|
88
88
|
What if we want to find all documents entered on or after 5th of September,
|
89
89
|
2005 with the words "ruby" or "rails" in it. We could type something like;
|
90
90
|
|
91
|
-
index.
|
91
|
+
index.search_each('date:( >= 20050905) content:(ruby OR rails)') do |doc, score|
|
92
92
|
puts "Document #{doc} found with a score of #{score}"
|
93
93
|
end
|
94
94
|
|
data/lib/ferret.rb
CHANGED
data/lib/ferret/index/index.rb
CHANGED
@@ -94,11 +94,12 @@ module Index
|
|
94
94
|
else
|
95
95
|
begin
|
96
96
|
@segment_infos.read(@directory)
|
97
|
-
rescue
|
97
|
+
rescue Exception => e
|
98
98
|
if options[:create_if_missing]
|
99
99
|
@segment_infos.write(@directory)
|
100
100
|
else
|
101
|
-
|
101
|
+
@write_lock.release() # obtain write lock
|
102
|
+
raise e
|
102
103
|
end
|
103
104
|
end
|
104
105
|
end
|
data/lib/ferret/query_parser.rb
CHANGED
@@ -106,12 +106,12 @@ module Ferret
|
|
106
106
|
#
|
107
107
|
# Some examples;
|
108
108
|
#
|
109
|
-
# '+sport:ski -sport:snowboard sport:
|
109
|
+
# '+sport:ski -sport:snowboard sport:toboggen'
|
110
110
|
# '+ingredient:chocolate +ingredient:strawberries -ingredient:wheat'
|
111
111
|
#
|
112
112
|
# You may also use the boolean operators "AND", "&&", "OR" and "||". eg;
|
113
113
|
#
|
114
|
-
# 'sport:ski AND NOT sport:snowboard OR sport:
|
114
|
+
# 'sport:ski AND NOT sport:snowboard OR sport:toboggen'
|
115
115
|
# 'ingredient:chocolate AND ingredient:strawberries AND NOT ingredient:wheat'
|
116
116
|
#
|
117
117
|
# You can set the default operator when you create the query parse.
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -130,11 +130,36 @@ class IndexTest < Test::Unit::TestCase
|
|
130
130
|
|
131
131
|
def test_fs_index
|
132
132
|
fs_path = File.join(File.dirname(__FILE__), '../../temp/fsdir')
|
133
|
-
|
133
|
+
`rm -rf #{fs_path}`
|
134
|
+
assert_raise(Errno::ENOENT) {Index.new(:path => fs_path, :create_if_missing => false, :default_field => "def_field")}
|
135
|
+
index = Index.new(:path => fs_path, :default_field => "def_field")
|
134
136
|
do_test_index_with_array(index)
|
137
|
+
`rm -rf #{fs_path}`
|
135
138
|
index = Index.new(:path => fs_path, :create => true, :default_field => "def_field")
|
136
139
|
do_test_index_with_hash(index)
|
137
|
-
index = Index.new(:create => true, :default_field => "def_field")
|
140
|
+
index = Index.new(:path => fs_path, :create => true, :default_field => "def_field")
|
138
141
|
do_test_index_with_doc_array(index)
|
139
142
|
end
|
143
|
+
|
144
|
+
def test_fs_index_is_persistant
|
145
|
+
fs_path = File.join(File.dirname(__FILE__), '../../temp/fsdir')
|
146
|
+
`rm -rf #{fs_path}`
|
147
|
+
data = [
|
148
|
+
{"def_field" => "one two", :id => "me"},
|
149
|
+
{"def_field" => "one", :field2 => "three"},
|
150
|
+
{"def_field" => "two"},
|
151
|
+
{"def_field" => "one", :field2 => "four"},
|
152
|
+
{"def_field" => "one two"},
|
153
|
+
{"def_field" => "two", :field2 => "three", "field3" => "four"},
|
154
|
+
{"def_field" => "one"},
|
155
|
+
{"def_field" => "two", :field2 => "three", "field3" => "five"}
|
156
|
+
]
|
157
|
+
index = Index.new(:path => fs_path, :default_field => "def_field")
|
158
|
+
data.each {|doc| index << doc }
|
159
|
+
assert_equal(8, index.size)
|
160
|
+
index.close
|
161
|
+
index = Index.new(:path => fs_path, :default_field => "def_field")
|
162
|
+
assert_equal(8, index.size)
|
163
|
+
assert_equal("four", index[5]["field3"])
|
164
|
+
end
|
140
165
|
end
|
metadata
CHANGED
@@ -3,13 +3,13 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2005-10-22 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
11
|
email: dbalmain@gmail.com
|
12
|
-
homepage: http://ferret.davebalmain.com
|
12
|
+
homepage: http://ferret.davebalmain.com/trac
|
13
13
|
rubyforge_project: ferret
|
14
14
|
description: Ferret is a port of the Java Lucene project. It is a powerful indexing and search library.
|
15
15
|
autorequire:
|
@@ -229,6 +229,7 @@ files:
|
|
229
229
|
- test/longrunning/tm_store.rb
|
230
230
|
- test/benchmark/tb_rw_vint.rb
|
231
231
|
- test/benchmark/tb_ram_store.rb
|
232
|
+
- test/unit/analysis/data/wordfile
|
232
233
|
- rake_utils/code_statistics.rb
|
233
234
|
test_files: []
|
234
235
|
rdoc_options:
|