ferret 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +10 -3
- data/Rakefile +2 -1
- data/TUTORIAL +3 -3
- data/lib/ferret.rb +1 -1
- data/lib/ferret/index/index.rb +1 -0
- data/lib/ferret/index/index_writer.rb +3 -2
- data/lib/ferret/query_parser.rb +2 -2
- data/test/unit/analysis/data/wordfile +6 -0
- data/test/unit/index/tc_index.rb +27 -2
- metadata +4 -3
data/README
CHANGED
@@ -87,10 +87,17 @@ only run on linux.
|
|
87
87
|
|
88
88
|
== Contact
|
89
89
|
|
90
|
-
|
91
|
-
the mailing list. More information on the list can be found at:
|
90
|
+
For bug reports and patches I have set up Trac here;
|
92
91
|
|
93
|
-
http://ferret.davebalmain.com/
|
92
|
+
http://ferret.davebalmain.com/trac
|
93
|
+
|
94
|
+
Queries, discussion etc should be addressed to the forum or mailing lists hosted
|
95
|
+
at;
|
96
|
+
|
97
|
+
http://rubyforge.org/projects/ferret/
|
98
|
+
|
99
|
+
Alternatively you could create a new page for discussion on the wiki at my Trac
|
100
|
+
page above. Or, if you're shy, please feel free to email me directly at dbalmain@gmail.com
|
94
101
|
|
95
102
|
Of course, since Ferret is almost a straight port of Java Lucene,
|
96
103
|
everything said about Lucene at http://jakarta.apache.org/lucene/ should
|
data/Rakefile
CHANGED
@@ -115,6 +115,7 @@ PKG_FILES = FileList[
|
|
115
115
|
'ext/**/*',
|
116
116
|
'lib/**/*.rb',
|
117
117
|
'test/**/*.rb',
|
118
|
+
'test/**/wordfile',
|
118
119
|
'rake_utils/**/*.rb',
|
119
120
|
'Rakefile'
|
120
121
|
]
|
@@ -170,7 +171,7 @@ else
|
|
170
171
|
|
171
172
|
s.author = "David Balmain"
|
172
173
|
s.email = "dbalmain@gmail.com"
|
173
|
-
s.homepage = "http://ferret.davebalmain.com"
|
174
|
+
s.homepage = "http://ferret.davebalmain.com/trac"
|
174
175
|
s.rubyforge_project = "ferret"
|
175
176
|
# if ENV['CERT_DIR']
|
176
177
|
# s.signing_key = File.join(ENV['CERT_DIR'], 'gem-private_key.pem')
|
data/TUTORIAL
CHANGED
@@ -74,21 +74,21 @@ Index#search_each. The first method returns a Ferret::Index::TopDocs object.
|
|
74
74
|
The second we'll show here. Lets say we wanted to find all documents with the
|
75
75
|
phrase "quick brown fox" in the content field. We'd write;
|
76
76
|
|
77
|
-
index.
|
77
|
+
index.search_each('content:"quick brown fox"') do |doc, score|
|
78
78
|
puts "Document #{doc} found with a score of #{score}"
|
79
79
|
end
|
80
80
|
|
81
81
|
But "fast" has a pretty similar meaning to "quick" and we don't mind if the
|
82
82
|
fox is a little red. So we could expand our search like this;
|
83
83
|
|
84
|
-
index.
|
84
|
+
index.search_each('content:"quick|fast brown|red fox"') do |doc, score|
|
85
85
|
puts "Document #{doc} found with a score of #{score}"
|
86
86
|
end
|
87
87
|
|
88
88
|
What if we want to find all documents entered on or after 5th of September,
|
89
89
|
2005 with the words "ruby" or "rails" in it. We could type something like;
|
90
90
|
|
91
|
-
index.
|
91
|
+
index.search_each('date:( >= 20050905) content:(ruby OR rails)') do |doc, score|
|
92
92
|
puts "Document #{doc} found with a score of #{score}"
|
93
93
|
end
|
94
94
|
|
data/lib/ferret.rb
CHANGED
data/lib/ferret/index/index.rb
CHANGED
@@ -94,11 +94,12 @@ module Index
|
|
94
94
|
else
|
95
95
|
begin
|
96
96
|
@segment_infos.read(@directory)
|
97
|
-
rescue
|
97
|
+
rescue Exception => e
|
98
98
|
if options[:create_if_missing]
|
99
99
|
@segment_infos.write(@directory)
|
100
100
|
else
|
101
|
-
|
101
|
+
@write_lock.release() # obtain write lock
|
102
|
+
raise e
|
102
103
|
end
|
103
104
|
end
|
104
105
|
end
|
data/lib/ferret/query_parser.rb
CHANGED
@@ -106,12 +106,12 @@ module Ferret
|
|
106
106
|
#
|
107
107
|
# Some examples;
|
108
108
|
#
|
109
|
-
# '+sport:ski -sport:snowboard sport:
|
109
|
+
# '+sport:ski -sport:snowboard sport:toboggen'
|
110
110
|
# '+ingredient:chocolate +ingredient:strawberries -ingredient:wheat'
|
111
111
|
#
|
112
112
|
# You may also use the boolean operators "AND", "&&", "OR" and "||". eg;
|
113
113
|
#
|
114
|
-
# 'sport:ski AND NOT sport:snowboard OR sport:
|
114
|
+
# 'sport:ski AND NOT sport:snowboard OR sport:toboggen'
|
115
115
|
# 'ingredient:chocolate AND ingredient:strawberries AND NOT ingredient:wheat'
|
116
116
|
#
|
117
117
|
# You can set the default operator when you create the query parse.
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -130,11 +130,36 @@ class IndexTest < Test::Unit::TestCase
|
|
130
130
|
|
131
131
|
def test_fs_index
|
132
132
|
fs_path = File.join(File.dirname(__FILE__), '../../temp/fsdir')
|
133
|
-
|
133
|
+
`rm -rf #{fs_path}`
|
134
|
+
assert_raise(Errno::ENOENT) {Index.new(:path => fs_path, :create_if_missing => false, :default_field => "def_field")}
|
135
|
+
index = Index.new(:path => fs_path, :default_field => "def_field")
|
134
136
|
do_test_index_with_array(index)
|
137
|
+
`rm -rf #{fs_path}`
|
135
138
|
index = Index.new(:path => fs_path, :create => true, :default_field => "def_field")
|
136
139
|
do_test_index_with_hash(index)
|
137
|
-
index = Index.new(:create => true, :default_field => "def_field")
|
140
|
+
index = Index.new(:path => fs_path, :create => true, :default_field => "def_field")
|
138
141
|
do_test_index_with_doc_array(index)
|
139
142
|
end
|
143
|
+
|
144
|
+
def test_fs_index_is_persistant
|
145
|
+
fs_path = File.join(File.dirname(__FILE__), '../../temp/fsdir')
|
146
|
+
`rm -rf #{fs_path}`
|
147
|
+
data = [
|
148
|
+
{"def_field" => "one two", :id => "me"},
|
149
|
+
{"def_field" => "one", :field2 => "three"},
|
150
|
+
{"def_field" => "two"},
|
151
|
+
{"def_field" => "one", :field2 => "four"},
|
152
|
+
{"def_field" => "one two"},
|
153
|
+
{"def_field" => "two", :field2 => "three", "field3" => "four"},
|
154
|
+
{"def_field" => "one"},
|
155
|
+
{"def_field" => "two", :field2 => "three", "field3" => "five"}
|
156
|
+
]
|
157
|
+
index = Index.new(:path => fs_path, :default_field => "def_field")
|
158
|
+
data.each {|doc| index << doc }
|
159
|
+
assert_equal(8, index.size)
|
160
|
+
index.close
|
161
|
+
index = Index.new(:path => fs_path, :default_field => "def_field")
|
162
|
+
assert_equal(8, index.size)
|
163
|
+
assert_equal("four", index[5]["field3"])
|
164
|
+
end
|
140
165
|
end
|
metadata
CHANGED
@@ -3,13 +3,13 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2005-10-22 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
11
|
email: dbalmain@gmail.com
|
12
|
-
homepage: http://ferret.davebalmain.com
|
12
|
+
homepage: http://ferret.davebalmain.com/trac
|
13
13
|
rubyforge_project: ferret
|
14
14
|
description: Ferret is a port of the Java Lucene project. It is a powerful indexing and search library.
|
15
15
|
autorequire:
|
@@ -229,6 +229,7 @@ files:
|
|
229
229
|
- test/longrunning/tm_store.rb
|
230
230
|
- test/benchmark/tb_rw_vint.rb
|
231
231
|
- test/benchmark/tb_ram_store.rb
|
232
|
+
- test/unit/analysis/data/wordfile
|
232
233
|
- rake_utils/code_statistics.rb
|
233
234
|
test_files: []
|
234
235
|
rdoc_options:
|