ferret 0.11.3 → 0.11.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +7 -1
- data/bin/ferret-browser +79 -0
- data/ext/analysis.c +5 -2
- data/ext/config.h +2 -1
- data/ext/ferret.c +32 -7
- data/ext/ferret.h +1 -0
- data/ext/index.c +69 -48
- data/ext/q_boolean.c +21 -7
- data/ext/q_parser.c +203 -113
- data/ext/q_span.c +2 -1
- data/ext/r_analysis.c +14 -1
- data/ext/r_index.c +179 -0
- data/ext/r_search.c +12 -30
- data/ext/search.c +1 -0
- data/ext/search.h +4 -0
- data/ext/store.c +24 -0
- data/ext/store.h +14 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/index.rb +67 -36
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +5 -5
- data/test/unit/analysis/tc_token_stream.rb +4 -4
- data/test/unit/index/tc_index.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +37 -0
- data/test/unit/search/tc_spans.rb +18 -1
- metadata +18 -5
@@ -0,0 +1 @@
|
|
1
|
+
<%= @reader.term_positions_for(@field, @term).to_json(:fast) %>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module WEBrick
|
2
|
+
class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet
|
3
|
+
# Creates a FerretBrowserHandler, which answers for the application
|
4
|
+
# within +klass+.
|
5
|
+
def initialize(server, reader, path)
|
6
|
+
super(server)
|
7
|
+
@delegator = Ferret::Browser::Delegator.new(reader, path)
|
8
|
+
end
|
9
|
+
# Handler for WEBrick requests (also aliased as do_POST).
|
10
|
+
def do_GET(req, res)
|
11
|
+
res.status, res.content_type, res.body = @delegator.run(req.meta_vars)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/ferret/index.rb
CHANGED
@@ -387,40 +387,54 @@ module Ferret::Index
|
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
390
|
-
#
|
391
|
-
#
|
390
|
+
# Retrieves a document/documents from the index. The method for retrieval
|
391
|
+
# depends on the type of the argument passed.
|
392
392
|
#
|
393
|
-
#
|
394
|
-
#
|
395
|
-
|
393
|
+
# If +arg+ is an Integer then return the document based on the internal
|
394
|
+
# document number.
|
395
|
+
#
|
396
|
+
# If +arg+ is a Range, then return the documents within the range based on
|
397
|
+
# internal document number.
|
398
|
+
#
|
399
|
+
# If +arg+ is a String then search for the first document with +arg+ in
|
400
|
+
# the +id+ field. The +id+ field is either :id or whatever you set
|
401
|
+
# :id_field parameter to when you create the Index object.
|
402
|
+
def doc(*arg)
|
396
403
|
@dir.synchronize do
|
397
|
-
|
398
|
-
id = args[0]
|
404
|
+
id = arg[0]
|
399
405
|
if id.kind_of?(String) or id.kind_of?(Symbol)
|
406
|
+
ensure_reader_open()
|
400
407
|
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
|
401
408
|
return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil
|
409
|
+
else
|
410
|
+
ensure_reader_open(false)
|
411
|
+
return @reader[*arg]
|
402
412
|
end
|
403
|
-
return @reader[*args]
|
404
413
|
end
|
405
414
|
end
|
406
415
|
alias :[] :doc
|
407
416
|
|
408
|
-
#
|
409
|
-
#
|
410
|
-
#
|
417
|
+
# Deletes a document/documents from the index. The method for determining
|
418
|
+
# the document to delete depends on the type of the argument passed.
|
419
|
+
#
|
420
|
+
# If +arg+ is an Integer then delete the document based on the internal
|
421
|
+
# document number. Will raise an error if the document does not exist.
|
411
422
|
#
|
412
|
-
#
|
413
|
-
|
423
|
+
# If +arg+ is a String then search for the documents with +arg+ in the
|
424
|
+
# +id+ field. The +id+ field is either :id or whatever you set :id_field
|
425
|
+
# parameter to when you create the Index object. Will fail quietly if the
|
426
|
+
# no document exists.
|
427
|
+
def delete(arg)
|
414
428
|
@dir.synchrolock do
|
415
429
|
ensure_writer_open()
|
416
|
-
if
|
430
|
+
if arg.is_a?(String) or arg.is_a?(Symbol)
|
417
431
|
ensure_writer_open()
|
418
|
-
@writer.delete(@id_field,
|
419
|
-
elsif
|
432
|
+
@writer.delete(@id_field, arg.to_s)
|
433
|
+
elsif arg.is_a?(Integer)
|
420
434
|
ensure_reader_open()
|
421
|
-
cnt = @reader.delete(
|
435
|
+
cnt = @reader.delete(arg)
|
422
436
|
else
|
423
|
-
raise ArgumentError, "Cannot delete for
|
437
|
+
raise ArgumentError, "Cannot delete for arg of type #{arg.class}"
|
424
438
|
end
|
425
439
|
flush() if @auto_flush
|
426
440
|
end
|
@@ -537,12 +551,15 @@ module Ferret::Index
|
|
537
551
|
# index.
|
538
552
|
def flush()
|
539
553
|
@dir.synchronize do
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
554
|
+
if @reader
|
555
|
+
if @searcher
|
556
|
+
@searcher.close
|
557
|
+
@searcher = nil
|
558
|
+
end
|
559
|
+
@reader.commit
|
560
|
+
elsif @writer
|
561
|
+
@writer.commit
|
562
|
+
end
|
546
563
|
end
|
547
564
|
end
|
548
565
|
alias :commit :flush
|
@@ -614,7 +631,7 @@ module Ferret::Index
|
|
614
631
|
# false.
|
615
632
|
def persist(directory, create = true)
|
616
633
|
synchronize do
|
617
|
-
|
634
|
+
close_all()
|
618
635
|
old_dir = @dir
|
619
636
|
if directory.is_a?(String)
|
620
637
|
@dir = FSDirectory.new(directory, create)
|
@@ -684,19 +701,22 @@ module Ferret::Index
|
|
684
701
|
end
|
685
702
|
|
686
703
|
# returns the new reader if one is opened
|
687
|
-
def ensure_reader_open()
|
704
|
+
def ensure_reader_open(get_latest = true)
|
688
705
|
raise "tried to use a closed index" if not @open
|
689
706
|
if @reader
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
707
|
+
if get_latest
|
708
|
+
latest = false
|
709
|
+
begin
|
710
|
+
latest = @reader.latest?
|
711
|
+
rescue Lock::LockError => le
|
712
|
+
sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
|
713
|
+
latest = @reader.latest?
|
714
|
+
end
|
715
|
+
if not latest
|
716
|
+
@searcher.close if @searcher
|
717
|
+
@reader.close
|
718
|
+
return @reader = IndexReader.new(@dir)
|
719
|
+
end
|
700
720
|
end
|
701
721
|
else
|
702
722
|
if @writer
|
@@ -737,6 +757,17 @@ module Ferret::Index
|
|
737
757
|
|
738
758
|
return @searcher.search(query, options)
|
739
759
|
end
|
760
|
+
|
761
|
+
def close_all()
|
762
|
+
@dir.synchronize do
|
763
|
+
@searcher.close if @searcher
|
764
|
+
@reader.close if @reader
|
765
|
+
@writer.close if @writer
|
766
|
+
@reader = nil
|
767
|
+
@searcher = nil
|
768
|
+
@writer = nil
|
769
|
+
end
|
770
|
+
end
|
740
771
|
end
|
741
772
|
end
|
742
773
|
|
data/lib/ferret_version.rb
CHANGED
@@ -38,7 +38,7 @@ class AnalyzerTest < Test::Unit::TestCase
|
|
38
38
|
assert_equal(Token.new("ADDRESS", 39, 46), t.next())
|
39
39
|
assert(! t.next())
|
40
40
|
end
|
41
|
-
end if (/utf-8/i
|
41
|
+
end if (/utf-8/i =~ Ferret.locale)
|
42
42
|
|
43
43
|
class AsciiLetterAnalyzerTest < Test::Unit::TestCase
|
44
44
|
include Ferret::Analysis
|
@@ -131,7 +131,7 @@ class LetterAnalyzerTest < Test::Unit::TestCase
|
|
131
131
|
assert_equal(Token.new("öîí", 80, 86), t.next)
|
132
132
|
assert(! t.next())
|
133
133
|
end
|
134
|
-
end if (/utf-8/i
|
134
|
+
end if (/utf-8/i =~ Ferret.locale)
|
135
135
|
|
136
136
|
class AsciiWhiteSpaceAnalyzerTest < Test::Unit::TestCase
|
137
137
|
include Ferret::Analysis
|
@@ -214,7 +214,7 @@ class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
|
|
214
214
|
assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
|
215
215
|
assert(! t.next())
|
216
216
|
end
|
217
|
-
end if (/utf-8/i
|
217
|
+
end if (/utf-8/i =~ Ferret.locale)
|
218
218
|
|
219
219
|
class AsciiStandardAnalyzerTest < Test::Unit::TestCase
|
220
220
|
include Ferret::Analysis
|
@@ -350,7 +350,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
|
|
350
350
|
assert_equal(Token.new('öîí', 142, 148), t2.next)
|
351
351
|
assert(! t2.next())
|
352
352
|
end
|
353
|
-
end if (/utf-8/i
|
353
|
+
end if (/utf-8/i =~ Ferret.locale)
|
354
354
|
|
355
355
|
class PerFieldAnalyzerTest < Test::Unit::TestCase
|
356
356
|
include Ferret::Analysis
|
@@ -545,4 +545,4 @@ class CustomAnalyzerTest < Test::Unit::TestCase
|
|
545
545
|
assert_equal(Token.new("dêbater", 36, 44), t.next)
|
546
546
|
assert(! t.next())
|
547
547
|
end
|
548
|
-
end if (/utf-8/i
|
548
|
+
end if (/utf-8/i =~ Ferret.locale)
|
@@ -109,7 +109,7 @@ class LetterTokenizerTest < Test::Unit::TestCase
|
|
109
109
|
assert_equal(Token.new('öîí', 80, 86), t.next)
|
110
110
|
assert(! t.next())
|
111
111
|
end
|
112
|
-
end if (/utf-8/i
|
112
|
+
end if (/utf-8/i =~ Ferret.locale)
|
113
113
|
|
114
114
|
class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase
|
115
115
|
include Ferret::Analysis
|
@@ -186,7 +186,7 @@ class WhiteSpaceTokenizerTest < Test::Unit::TestCase
|
|
186
186
|
assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
|
187
187
|
assert(! t.next())
|
188
188
|
end
|
189
|
-
end if (/utf-8/i
|
189
|
+
end if (/utf-8/i =~ Ferret.locale)
|
190
190
|
|
191
191
|
class AsciiStandardTokenizerTest < Test::Unit::TestCase
|
192
192
|
include Ferret::Analysis
|
@@ -275,7 +275,7 @@ class StandardTokenizerTest < Test::Unit::TestCase
|
|
275
275
|
assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next)
|
276
276
|
assert(! t.next())
|
277
277
|
end
|
278
|
-
end if (/utf-8/i
|
278
|
+
end if (/utf-8/i =~ Ferret.locale)
|
279
279
|
|
280
280
|
class RegExpTokenizerTest < Test::Unit::TestCase
|
281
281
|
include Ferret::Analysis
|
@@ -428,7 +428,7 @@ END
|
|
428
428
|
assert_equal(Token.new('szzzt', 256, 264), t.next)
|
429
429
|
assert(! t.next())
|
430
430
|
end
|
431
|
-
end if (/utf-8/i
|
431
|
+
end if (/utf-8/i =~ Ferret.locale)
|
432
432
|
|
433
433
|
class StopFilterTest < Test::Unit::TestCase
|
434
434
|
include Ferret::Analysis
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -150,7 +150,7 @@ class IndexTest < Test::Unit::TestCase
|
|
150
150
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
151
151
|
|
152
152
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
153
|
-
assert_raise(FileNotFoundError) do
|
153
|
+
assert_raise(Ferret::FileNotFoundError) do
|
154
154
|
Index.new(:path => fs_path,
|
155
155
|
:create_if_missing => false,
|
156
156
|
:default_field => :xxx)
|
@@ -31,6 +31,11 @@ module IndexReaderCommon
|
|
31
31
|
def do_test_term_enum()
|
32
32
|
te = @ir.terms(:author)
|
33
33
|
|
34
|
+
assert_equal('[{"term":"Leo","frequency":1},{"term":"Tolstoy","frequency":1}]', te.to_json);
|
35
|
+
te.field = :author
|
36
|
+
assert_equal('[["Leo",1],["Tolstoy",1]]', te.to_json(:fast));
|
37
|
+
te.field = :author
|
38
|
+
|
34
39
|
assert(te.next?)
|
35
40
|
assert_equal("Leo", te.term)
|
36
41
|
assert_equal(1, te.doc_freq)
|
@@ -99,6 +104,11 @@ module IndexReaderCommon
|
|
99
104
|
end
|
100
105
|
assert(! tde.next?)
|
101
106
|
|
107
|
+
tde = @ir.term_docs_for(:body, "Wally")
|
108
|
+
assert_equal('[{"document":0,"frequency":1},{"document":5,"frequency":1},{"document":18,"frequency":3},{"document":20,"frequency":6}]', tde.to_json)
|
109
|
+
tde = @ir.term_docs_for(:body, "Wally")
|
110
|
+
assert_equal('[[0,1],[5,1],[18,3],[20,6]]', tde.to_json(:fast))
|
111
|
+
|
102
112
|
do_test_term_docpos_enum_skip_to(tde)
|
103
113
|
|
104
114
|
# test term positions
|
@@ -124,6 +134,33 @@ module IndexReaderCommon
|
|
124
134
|
assert_nil(tde.next_position())
|
125
135
|
assert(! tde.next?)
|
126
136
|
|
137
|
+
tde = @ir.term_positions_for(:body, "read")
|
138
|
+
assert_equal('[' +
|
139
|
+
'{"document":1,"frequency":1,"positions":[3]},' +
|
140
|
+
'{"document":2,"frequency":2,"positions":[1,4]},' +
|
141
|
+
'{"document":6,"frequency":4,"positions":[3,4,5,6]},' +
|
142
|
+
'{"document":9,"frequency":3,"positions":[0,4,13]},' +
|
143
|
+
'{"document":10,"frequency":1,"positions":[1]},' +
|
144
|
+
'{"document":16,"frequency":2,"positions":[2,3]},' +
|
145
|
+
'{"document":17,"frequency":1,"positions":[2]},' +
|
146
|
+
'{"document":20,"frequency":1,"positions":[21]},' +
|
147
|
+
'{"document":21,"frequency":6,"positions":[3,4,5,8,9,10]}]',
|
148
|
+
tde.to_json())
|
149
|
+
tde = @ir.term_positions_for(:body, "read")
|
150
|
+
assert_equal('[' +
|
151
|
+
'[1,1,[3]],' +
|
152
|
+
'[2,2,[1,4]],' +
|
153
|
+
'[6,4,[3,4,5,6]],' +
|
154
|
+
'[9,3,[0,4,13]],' +
|
155
|
+
'[10,1,[1]],' +
|
156
|
+
'[16,2,[2,3]],' +
|
157
|
+
'[17,1,[2]],' +
|
158
|
+
'[20,1,[21]],' +
|
159
|
+
'[21,6,[3,4,5,8,9,10]]]',
|
160
|
+
tde.to_json(:fast))
|
161
|
+
|
162
|
+
tde = @ir.term_positions_for(:body, "read")
|
163
|
+
|
127
164
|
do_test_term_docpos_enum_skip_to(tde)
|
128
165
|
end
|
129
166
|
|
@@ -68,7 +68,7 @@ class SpansBasicTest < Test::Unit::TestCase
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def check_hits(query, expected, test_explain = false, top=nil)
|
71
|
-
top_docs = @searcher.search(query, {:limit => expected.length})
|
71
|
+
top_docs = @searcher.search(query, {:limit => expected.length + 1})
|
72
72
|
assert_equal(expected.length, top_docs.hits.size)
|
73
73
|
assert_equal(top, top_docs.hits[0].doc) if top
|
74
74
|
assert_equal(expected.length, top_docs.total_hits)
|
@@ -170,4 +170,21 @@ class SpansBasicTest < Test::Unit::TestCase
|
|
170
170
|
q = SpanOrQuery.new([nearq1, nearq2])
|
171
171
|
check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
|
172
172
|
end
|
173
|
+
|
174
|
+
def test_span_prefix_query_max_terms
|
175
|
+
@dir = RAMDirectory.new
|
176
|
+
iw = IndexWriter.new(:dir => @dir,
|
177
|
+
:analyzer => WhiteSpaceAnalyzer.new())
|
178
|
+
2000.times { |i| iw << {:field => "prefix#{i} term#{i}"} }
|
179
|
+
iw.close()
|
180
|
+
@searcher = Searcher.new(@dir)
|
181
|
+
|
182
|
+
pq = SpanPrefixQuery.new(:field, "prefix")
|
183
|
+
tq = SpanTermQuery.new(:field, "term1500")
|
184
|
+
q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
|
185
|
+
check_hits(q, [], false)
|
186
|
+
pq = SpanPrefixQuery.new(:field, "prefix", 2000)
|
187
|
+
q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
|
188
|
+
check_hits(q, [1500], false)
|
189
|
+
end
|
173
190
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.11.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.11.4
|
7
|
+
date: 2007-04-07 00:00:00 +10:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -13,7 +13,7 @@ homepage: http://ferret.davebalmain.com/trac
|
|
13
13
|
rubyforge_project: ferret
|
14
14
|
description: Ferret is a port of the Java Lucene project. It is a powerful indexing and search library.
|
15
15
|
autorequire: ferret
|
16
|
-
default_executable:
|
16
|
+
default_executable: ferret-browser
|
17
17
|
bindir: bin
|
18
18
|
has_rdoc: true
|
19
19
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
@@ -168,7 +168,20 @@ files:
|
|
168
168
|
- lib/ferret/field_infos.rb
|
169
169
|
- lib/ferret/document.rb
|
170
170
|
- lib/ferret/index.rb
|
171
|
+
- lib/ferret/browser.rb
|
171
172
|
- lib/ferret/number_tools.rb
|
173
|
+
- lib/ferret/browser/webrick.rb
|
174
|
+
- lib/ferret/browser/views/layout.rhtml
|
175
|
+
- lib/ferret/browser/views/term/index.rhtml
|
176
|
+
- lib/ferret/browser/views/term/termdocs.rhtml
|
177
|
+
- lib/ferret/browser/views/home/index.rhtml
|
178
|
+
- lib/ferret/browser/views/help/index.rhtml
|
179
|
+
- lib/ferret/browser/views/document/list.rhtml
|
180
|
+
- lib/ferret/browser/views/document/show.rhtml
|
181
|
+
- lib/ferret/browser/views/error/index.rhtml
|
182
|
+
- lib/ferret/browser/views/term-vector/index.rhtml
|
183
|
+
- lib/ferret/browser/s/style.css
|
184
|
+
- lib/ferret/browser/s/global.js
|
172
185
|
- test/test_helper.rb
|
173
186
|
- test/test_all.rb
|
174
187
|
- test/unit/tc_document.rb
|
@@ -227,8 +240,8 @@ extra_rdoc_files:
|
|
227
240
|
- ext/r_index.c
|
228
241
|
- ext/r_store.c
|
229
242
|
- ext/ferret.c
|
230
|
-
executables:
|
231
|
-
|
243
|
+
executables:
|
244
|
+
- ferret-browser
|
232
245
|
extensions:
|
233
246
|
- ext/extconf.rb
|
234
247
|
requirements: []
|