ferret 0.10.6 → 0.10.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/analysis.c +136 -107
- data/ext/analysis.h +4 -0
- data/ext/bitvector.c +2 -2
- data/ext/bitvector.h +1 -1
- data/ext/compound_io.c +4 -4
- data/ext/defines.h +0 -2
- data/ext/filter.c +3 -3
- data/ext/fs_store.c +4 -4
- data/ext/hash.c +29 -18
- data/ext/hash.h +34 -16
- data/ext/hashset.c +6 -3
- data/ext/hashset.h +1 -1
- data/ext/index.c +22 -20
- data/ext/q_boolean.c +3 -3
- data/ext/q_const_score.c +1 -1
- data/ext/q_fuzzy.c +1 -1
- data/ext/q_match_all.c +1 -1
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +21 -6
- data/ext/q_phrase.c +2 -2
- data/ext/q_prefix.c +1 -1
- data/ext/q_range.c +3 -3
- data/ext/q_span.c +8 -8
- data/ext/q_term.c +1 -1
- data/ext/q_wildcard.c +1 -1
- data/ext/r_analysis.c +10 -4
- data/ext/r_index.c +89 -12
- data/ext/r_qparser.c +67 -4
- data/ext/r_search.c +11 -1
- data/ext/r_store.c +51 -35
- data/ext/ram_store.c +18 -18
- data/ext/search.c +1 -1
- data/ext/search.h +25 -23
- data/ext/similarity.c +1 -1
- data/ext/sort.c +1 -1
- data/ext/store.c +22 -3
- data/ext/store.h +8 -2
- data/lib/ferret/index.rb +14 -4
- data/lib/ferret_version.rb +1 -1
- data/test/test_helper.rb +3 -0
- data/test/unit/analysis/tc_analyzer.rb +5 -5
- data/test/unit/analysis/tc_token_stream.rb +3 -3
- data/test/unit/index/tc_index_writer.rb +1 -1
- data/test/unit/query_parser/tc_query_parser.rb +7 -5
- data/test/unit/search/tc_filter.rb +1 -1
- data/test/unit/search/tc_fuzzy_query.rb +1 -1
- data/test/unit/search/tc_index_searcher.rb +1 -1
- data/test/unit/search/tc_multi_searcher.rb +1 -1
- data/test/unit/search/tc_search_and_sort.rb +1 -1
- data/test/unit/search/tc_spans.rb +1 -1
- metadata +4 -3
data/ext/store.h
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
#include <sys/types.h>
|
5
5
|
#include "global.h"
|
6
6
|
#include "hash.h"
|
7
|
+
#include "hashset.h"
|
7
8
|
#include "threading.h"
|
8
9
|
|
9
10
|
#define BUFFER_SIZE 1024
|
@@ -175,6 +176,8 @@ struct Store
|
|
175
176
|
CompoundStore *cmpd; /* for compound_store only */
|
176
177
|
} dir;
|
177
178
|
|
179
|
+
HashSet *locks;
|
180
|
+
|
178
181
|
/**
|
179
182
|
* Create the file +filename+ in the +store+.
|
180
183
|
*
|
@@ -299,7 +302,7 @@ struct Store
|
|
299
302
|
* @param store self
|
300
303
|
* @param lock the lock to obtain
|
301
304
|
*/
|
302
|
-
Lock *(*
|
305
|
+
Lock *(*open_lock_i)(Store *store, char *lockname);
|
303
306
|
|
304
307
|
/**
|
305
308
|
* Returns true if +lock+ is locked. To test if the file is locked:wq
|
@@ -307,7 +310,7 @@ struct Store
|
|
307
310
|
* @param lock the lock to test
|
308
311
|
* @raise IO_ERROR if there is an error detecting the lock status
|
309
312
|
*/
|
310
|
-
void (*
|
313
|
+
void (*close_lock_i)(Lock *lock);
|
311
314
|
|
312
315
|
/**
|
313
316
|
* Internal function to close the store freeing implementation specific
|
@@ -730,4 +733,7 @@ extern void is2os_copy_vints(InStream *is, OutStream *os, int cnt);
|
|
730
733
|
* @paran len the length of the buffer
|
731
734
|
*/
|
732
735
|
extern char *store_to_s(Store *store, char *buf, int buf_size);
|
736
|
+
|
737
|
+
extern Lock *open_lock(Store *store, char *lockname);
|
738
|
+
extern void close_lock(Lock *lock);
|
733
739
|
#endif
|
data/lib/ferret/index.rb
CHANGED
@@ -57,6 +57,10 @@ module Ferret::Index
|
|
57
57
|
# long to wait before retrying to obtain the
|
58
58
|
# commit lock when detecting if the IndexReader is
|
59
59
|
# at the latest version.
|
60
|
+
# close_dir:: Default: false. If you explicitly pass a
|
61
|
+
# Directory object to this class and you want
|
62
|
+
# Index to close it when it is closed itself then
|
63
|
+
# set this to true.
|
60
64
|
#
|
61
65
|
# Some examples;
|
62
66
|
#
|
@@ -86,10 +90,12 @@ module Ferret::Index
|
|
86
90
|
end
|
87
91
|
end
|
88
92
|
|
93
|
+
@close_dir = options[:close_dir]
|
89
94
|
if options[:dir].is_a?(String)
|
90
95
|
options[:path] = options[:dir]
|
91
96
|
end
|
92
97
|
if options[:path]
|
98
|
+
@close_dir = true
|
93
99
|
begin
|
94
100
|
@dir = FSDirectory.new(options[:path], options[:create])
|
95
101
|
rescue IOError => io
|
@@ -99,6 +105,7 @@ module Ferret::Index
|
|
99
105
|
@dir = options[:dir]
|
100
106
|
else
|
101
107
|
options[:create] = true # this should always be true for a new RAMDir
|
108
|
+
@close_dir = true
|
102
109
|
@dir = RAMDirectory.new
|
103
110
|
end
|
104
111
|
|
@@ -116,7 +123,7 @@ module Ferret::Index
|
|
116
123
|
@writer = nil
|
117
124
|
@reader = nil
|
118
125
|
|
119
|
-
@options.delete(:create) # only
|
126
|
+
@options.delete(:create) # only create the first time if at all
|
120
127
|
@auto_flush = @options[:auto_flush] || false
|
121
128
|
if (@options[:id_field].nil? and
|
122
129
|
@key.is_a?(Symbol))
|
@@ -183,7 +190,7 @@ module Ferret::Index
|
|
183
190
|
@searcher.close() if @searcher
|
184
191
|
@reader.close() if @reader
|
185
192
|
@writer.close() if @writer
|
186
|
-
@dir.close()
|
193
|
+
@dir.close() if @close_dir
|
187
194
|
|
188
195
|
@open = false
|
189
196
|
end
|
@@ -683,8 +690,11 @@ module Ferret::Index
|
|
683
690
|
if @qp.nil?
|
684
691
|
@qp = Ferret::QueryParser.new(@options)
|
685
692
|
end
|
686
|
-
# we need to set this
|
687
|
-
@qp.fields =
|
693
|
+
# we need to set this every time, in case a new field has been added
|
694
|
+
@qp.fields =
|
695
|
+
@reader.fields unless options[:all_fields] || options[:fields]
|
696
|
+
@qp.tokenized_fields =
|
697
|
+
@reader.tokenized_fields unless options[:tokenized_fields]
|
688
698
|
query = @qp.parse(query)
|
689
699
|
end
|
690
700
|
return query
|
data/lib/ferret_version.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -2,6 +2,9 @@ $:.unshift File.dirname(__FILE__)
|
|
2
2
|
$:.unshift File.join(File.dirname(__FILE__), '../lib')
|
3
3
|
$:.unshift File.join(File.dirname(__FILE__), '../ext')
|
4
4
|
|
5
|
+
ENV['LANG'] = "en_US.UTF-8"
|
6
|
+
ENV['LC_CTYPE'] = "en_US.UTF-8"
|
7
|
+
|
5
8
|
class Float
|
6
9
|
def approx_eql?(o)
|
7
10
|
return (1 - self/o).abs < 0.0001
|
@@ -38,7 +38,7 @@ class AnalyzerTest < Test::Unit::TestCase
|
|
38
38
|
assert_equal(Token.new("ADDRESS", 39, 46), t.next())
|
39
39
|
assert(! t.next())
|
40
40
|
end
|
41
|
-
end if
|
41
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
42
42
|
|
43
43
|
class AsciiLetterAnalyzerTest < Test::Unit::TestCase
|
44
44
|
include Ferret::Analysis
|
@@ -131,7 +131,7 @@ class LetterAnalyzerTest < Test::Unit::TestCase
|
|
131
131
|
assert_equal(Token.new("öîí", 80, 86), t.next)
|
132
132
|
assert(! t.next())
|
133
133
|
end
|
134
|
-
end if
|
134
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
135
135
|
|
136
136
|
class AsciiWhiteSpaceAnalyzerTest < Test::Unit::TestCase
|
137
137
|
include Ferret::Analysis
|
@@ -214,7 +214,7 @@ class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
|
|
214
214
|
assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
|
215
215
|
assert(! t.next())
|
216
216
|
end
|
217
|
-
end if
|
217
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
218
218
|
|
219
219
|
class AsciiStandardAnalyzerTest < Test::Unit::TestCase
|
220
220
|
include Ferret::Analysis
|
@@ -350,7 +350,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
|
|
350
350
|
assert_equal(Token.new('öîí', 142, 148), t2.next)
|
351
351
|
assert(! t2.next())
|
352
352
|
end
|
353
|
-
end if
|
353
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
354
354
|
|
355
355
|
class PerFieldAnalyzerTest < Test::Unit::TestCase
|
356
356
|
include Ferret::Analysis
|
@@ -545,4 +545,4 @@ class CustomAnalyzerTest < Test::Unit::TestCase
|
|
545
545
|
assert_equal(Token.new("dêbater", 36, 44), t.next)
|
546
546
|
assert(! t.next())
|
547
547
|
end
|
548
|
-
end if
|
548
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
@@ -109,7 +109,7 @@ class LetterTokenizerTest < Test::Unit::TestCase
|
|
109
109
|
assert_equal(Token.new('öîí', 80, 86), t.next)
|
110
110
|
assert(! t.next())
|
111
111
|
end
|
112
|
-
end if
|
112
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
113
113
|
|
114
114
|
class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase
|
115
115
|
include Ferret::Analysis
|
@@ -186,7 +186,7 @@ class WhiteSpaceTokenizerTest < Test::Unit::TestCase
|
|
186
186
|
assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
|
187
187
|
assert(! t.next())
|
188
188
|
end
|
189
|
-
end if
|
189
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
190
190
|
|
191
191
|
class AsciiStandardTokenizerTest < Test::Unit::TestCase
|
192
192
|
include Ferret::Analysis
|
@@ -275,7 +275,7 @@ class StandardTokenizerTest < Test::Unit::TestCase
|
|
275
275
|
assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next)
|
276
276
|
assert(! t.next())
|
277
277
|
end
|
278
|
-
end if
|
278
|
+
end if (/mswin/i !~ RUBY_PLATFORM)
|
279
279
|
|
280
280
|
class RegExpTokenizerTest < Test::Unit::TestCase
|
281
281
|
include Ferret::Analysis
|
@@ -5,7 +5,8 @@ class QueryParserTest < Test::Unit::TestCase
|
|
5
5
|
|
6
6
|
def test_strings()
|
7
7
|
parser = Ferret::QueryParser.new(:default_field => "xxx",
|
8
|
-
:fields => ["xxx", "field", "f1", "f2"]
|
8
|
+
:fields => ["xxx", "field", "f1", "f2"],
|
9
|
+
:tokenized_fields => ["xxx", "f1", "f2"])
|
9
10
|
pairs = [
|
10
11
|
['', ''],
|
11
12
|
['*:word', 'word field:word f1:word f2:word'],
|
@@ -14,12 +15,13 @@ class QueryParserTest < Test::Unit::TestCase
|
|
14
15
|
['"word1 word2 word#"', '"word1 word2 word"'],
|
15
16
|
['"word1 %%% word3"', '"word1 <> word3"~1'],
|
16
17
|
['field:"one two three"', 'field:"one two three"'],
|
17
|
-
['field:"one %%% three"', 'field:"one
|
18
|
+
['field:"one %%% three"', 'field:"one %%% three"'],
|
19
|
+
['f1:"one %%% three"', 'f1:"one <> three"~1'],
|
18
20
|
['field:"one <> three"', 'field:"one <> three"'],
|
19
21
|
['field:"one <> three <>"', 'field:"one <> three"'],
|
20
22
|
['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
|
21
|
-
['field:"one <>
|
22
|
-
['field:"
|
23
|
+
['field:"one <> 222 <> three|four|five <>"', 'field:"one <> 222 <> three|four|five"'],
|
24
|
+
['field:"on1|tw2 THREE|four|five six|seven"', 'field:"on1|tw2 THREE|four|five six|seven"'],
|
23
25
|
['field:"testing|trucks"', 'field:testing field:trucks'],
|
24
26
|
['[aaa bbb]', '[aaa bbb]'],
|
25
27
|
['{aaa bbb]', '{aaa bbb]'],
|
@@ -27,7 +29,7 @@ class QueryParserTest < Test::Unit::TestCase
|
|
27
29
|
['{aaa bbb}', '{aaa bbb}'],
|
28
30
|
['{aaa>', '{aaa>'],
|
29
31
|
['[aaa>', '[aaa>'],
|
30
|
-
['field:<
|
32
|
+
['field:<a\ aa}', 'field:<a aa}'],
|
31
33
|
['<aaa]', '<aaa]'],
|
32
34
|
['>aaa', '{aaa>'],
|
33
35
|
['>=aaa', '[aaa>'],
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.
|
2
|
+
rubygems_version: 0.9.0
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.7
|
7
|
+
date: 2006-09-24 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
25
25
|
platform: ruby
|
26
26
|
signing_key:
|
27
27
|
cert_chain:
|
28
|
+
post_install_message:
|
28
29
|
authors:
|
29
30
|
- David Balmain
|
30
31
|
files:
|