ferret 0.11.3 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -1
- data/bin/ferret-browser +79 -0
- data/ext/analysis.c +5 -2
- data/ext/config.h +2 -1
- data/ext/ferret.c +32 -7
- data/ext/ferret.h +1 -0
- data/ext/index.c +69 -48
- data/ext/q_boolean.c +21 -7
- data/ext/q_parser.c +203 -113
- data/ext/q_span.c +2 -1
- data/ext/r_analysis.c +14 -1
- data/ext/r_index.c +179 -0
- data/ext/r_search.c +12 -30
- data/ext/search.c +1 -0
- data/ext/search.h +4 -0
- data/ext/store.c +24 -0
- data/ext/store.h +14 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/index.rb +67 -36
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +5 -5
- data/test/unit/analysis/tc_token_stream.rb +4 -4
- data/test/unit/index/tc_index.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +37 -0
- data/test/unit/search/tc_spans.rb +18 -1
- metadata +18 -5
@@ -0,0 +1 @@
|
|
1
|
+
<%= @reader.term_positions_for(@field, @term).to_json(:fast) %>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module WEBrick
|
2
|
+
class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet
|
3
|
+
# Creates a FerretBrowserHandler, which answers for the application
|
4
|
+
# within +klass+.
|
5
|
+
def initialize(server, reader, path)
|
6
|
+
super(server)
|
7
|
+
@delegator = Ferret::Browser::Delegator.new(reader, path)
|
8
|
+
end
|
9
|
+
# Handler for WEBrick requests (also aliased as do_POST).
|
10
|
+
def do_GET(req, res)
|
11
|
+
res.status, res.content_type, res.body = @delegator.run(req.meta_vars)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/ferret/index.rb
CHANGED
@@ -387,40 +387,54 @@ module Ferret::Index
|
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
390
|
-
#
|
391
|
-
#
|
390
|
+
# Retrieves a document/documents from the index. The method for retrieval
|
391
|
+
# depends on the type of the argument passed.
|
392
392
|
#
|
393
|
-
#
|
394
|
-
#
|
395
|
-
|
393
|
+
# If +arg+ is an Integer then return the document based on the internal
|
394
|
+
# document number.
|
395
|
+
#
|
396
|
+
# If +arg+ is a Range, then return the documents within the range based on
|
397
|
+
# internal document number.
|
398
|
+
#
|
399
|
+
# If +arg+ is a String then search for the first document with +arg+ in
|
400
|
+
# the +id+ field. The +id+ field is either :id or whatever you set
|
401
|
+
# :id_field parameter to when you create the Index object.
|
402
|
+
def doc(*arg)
|
396
403
|
@dir.synchronize do
|
397
|
-
|
398
|
-
id = args[0]
|
404
|
+
id = arg[0]
|
399
405
|
if id.kind_of?(String) or id.kind_of?(Symbol)
|
406
|
+
ensure_reader_open()
|
400
407
|
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
|
401
408
|
return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil
|
409
|
+
else
|
410
|
+
ensure_reader_open(false)
|
411
|
+
return @reader[*arg]
|
402
412
|
end
|
403
|
-
return @reader[*args]
|
404
413
|
end
|
405
414
|
end
|
406
415
|
alias :[] :doc
|
407
416
|
|
408
|
-
#
|
409
|
-
#
|
410
|
-
#
|
417
|
+
# Deletes a document/documents from the index. The method for determining
|
418
|
+
# the document to delete depends on the type of the argument passed.
|
419
|
+
#
|
420
|
+
# If +arg+ is an Integer then delete the document based on the internal
|
421
|
+
# document number. Will raise an error if the document does not exist.
|
411
422
|
#
|
412
|
-
#
|
413
|
-
|
423
|
+
# If +arg+ is a String then search for the documents with +arg+ in the
|
424
|
+
# +id+ field. The +id+ field is either :id or whatever you set :id_field
|
425
|
+
# parameter to when you create the Index object. Will fail quietly if the
|
426
|
+
# no document exists.
|
427
|
+
def delete(arg)
|
414
428
|
@dir.synchrolock do
|
415
429
|
ensure_writer_open()
|
416
|
-
if
|
430
|
+
if arg.is_a?(String) or arg.is_a?(Symbol)
|
417
431
|
ensure_writer_open()
|
418
|
-
@writer.delete(@id_field,
|
419
|
-
elsif
|
432
|
+
@writer.delete(@id_field, arg.to_s)
|
433
|
+
elsif arg.is_a?(Integer)
|
420
434
|
ensure_reader_open()
|
421
|
-
cnt = @reader.delete(
|
435
|
+
cnt = @reader.delete(arg)
|
422
436
|
else
|
423
|
-
raise ArgumentError, "Cannot delete for
|
437
|
+
raise ArgumentError, "Cannot delete for arg of type #{arg.class}"
|
424
438
|
end
|
425
439
|
flush() if @auto_flush
|
426
440
|
end
|
@@ -537,12 +551,15 @@ module Ferret::Index
|
|
537
551
|
# index.
|
538
552
|
def flush()
|
539
553
|
@dir.synchronize do
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
554
|
+
if @reader
|
555
|
+
if @searcher
|
556
|
+
@searcher.close
|
557
|
+
@searcher = nil
|
558
|
+
end
|
559
|
+
@reader.commit
|
560
|
+
elsif @writer
|
561
|
+
@writer.commit
|
562
|
+
end
|
546
563
|
end
|
547
564
|
end
|
548
565
|
alias :commit :flush
|
@@ -614,7 +631,7 @@ module Ferret::Index
|
|
614
631
|
# false.
|
615
632
|
def persist(directory, create = true)
|
616
633
|
synchronize do
|
617
|
-
|
634
|
+
close_all()
|
618
635
|
old_dir = @dir
|
619
636
|
if directory.is_a?(String)
|
620
637
|
@dir = FSDirectory.new(directory, create)
|
@@ -684,19 +701,22 @@ module Ferret::Index
|
|
684
701
|
end
|
685
702
|
|
686
703
|
# returns the new reader if one is opened
|
687
|
-
def ensure_reader_open()
|
704
|
+
def ensure_reader_open(get_latest = true)
|
688
705
|
raise "tried to use a closed index" if not @open
|
689
706
|
if @reader
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
707
|
+
if get_latest
|
708
|
+
latest = false
|
709
|
+
begin
|
710
|
+
latest = @reader.latest?
|
711
|
+
rescue Lock::LockError => le
|
712
|
+
sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
|
713
|
+
latest = @reader.latest?
|
714
|
+
end
|
715
|
+
if not latest
|
716
|
+
@searcher.close if @searcher
|
717
|
+
@reader.close
|
718
|
+
return @reader = IndexReader.new(@dir)
|
719
|
+
end
|
700
720
|
end
|
701
721
|
else
|
702
722
|
if @writer
|
@@ -737,6 +757,17 @@ module Ferret::Index
|
|
737
757
|
|
738
758
|
return @searcher.search(query, options)
|
739
759
|
end
|
760
|
+
|
761
|
+
def close_all()
|
762
|
+
@dir.synchronize do
|
763
|
+
@searcher.close if @searcher
|
764
|
+
@reader.close if @reader
|
765
|
+
@writer.close if @writer
|
766
|
+
@reader = nil
|
767
|
+
@searcher = nil
|
768
|
+
@writer = nil
|
769
|
+
end
|
770
|
+
end
|
740
771
|
end
|
741
772
|
end
|
742
773
|
|
data/lib/ferret_version.rb
CHANGED
@@ -38,7 +38,7 @@ class AnalyzerTest < Test::Unit::TestCase
|
|
38
38
|
assert_equal(Token.new("ADDRESS", 39, 46), t.next())
|
39
39
|
assert(! t.next())
|
40
40
|
end
|
41
|
-
end if (/utf-8/i
|
41
|
+
end if (/utf-8/i =~ Ferret.locale)
|
42
42
|
|
43
43
|
class AsciiLetterAnalyzerTest < Test::Unit::TestCase
|
44
44
|
include Ferret::Analysis
|
@@ -131,7 +131,7 @@ class LetterAnalyzerTest < Test::Unit::TestCase
|
|
131
131
|
assert_equal(Token.new("öîí", 80, 86), t.next)
|
132
132
|
assert(! t.next())
|
133
133
|
end
|
134
|
-
end if (/utf-8/i
|
134
|
+
end if (/utf-8/i =~ Ferret.locale)
|
135
135
|
|
136
136
|
class AsciiWhiteSpaceAnalyzerTest < Test::Unit::TestCase
|
137
137
|
include Ferret::Analysis
|
@@ -214,7 +214,7 @@ class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
|
|
214
214
|
assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
|
215
215
|
assert(! t.next())
|
216
216
|
end
|
217
|
-
end if (/utf-8/i
|
217
|
+
end if (/utf-8/i =~ Ferret.locale)
|
218
218
|
|
219
219
|
class AsciiStandardAnalyzerTest < Test::Unit::TestCase
|
220
220
|
include Ferret::Analysis
|
@@ -350,7 +350,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
|
|
350
350
|
assert_equal(Token.new('öîí', 142, 148), t2.next)
|
351
351
|
assert(! t2.next())
|
352
352
|
end
|
353
|
-
end if (/utf-8/i
|
353
|
+
end if (/utf-8/i =~ Ferret.locale)
|
354
354
|
|
355
355
|
class PerFieldAnalyzerTest < Test::Unit::TestCase
|
356
356
|
include Ferret::Analysis
|
@@ -545,4 +545,4 @@ class CustomAnalyzerTest < Test::Unit::TestCase
|
|
545
545
|
assert_equal(Token.new("dêbater", 36, 44), t.next)
|
546
546
|
assert(! t.next())
|
547
547
|
end
|
548
|
-
end if (/utf-8/i
|
548
|
+
end if (/utf-8/i =~ Ferret.locale)
|
@@ -109,7 +109,7 @@ class LetterTokenizerTest < Test::Unit::TestCase
|
|
109
109
|
assert_equal(Token.new('öîí', 80, 86), t.next)
|
110
110
|
assert(! t.next())
|
111
111
|
end
|
112
|
-
end if (/utf-8/i
|
112
|
+
end if (/utf-8/i =~ Ferret.locale)
|
113
113
|
|
114
114
|
class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase
|
115
115
|
include Ferret::Analysis
|
@@ -186,7 +186,7 @@ class WhiteSpaceTokenizerTest < Test::Unit::TestCase
|
|
186
186
|
assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
|
187
187
|
assert(! t.next())
|
188
188
|
end
|
189
|
-
end if (/utf-8/i
|
189
|
+
end if (/utf-8/i =~ Ferret.locale)
|
190
190
|
|
191
191
|
class AsciiStandardTokenizerTest < Test::Unit::TestCase
|
192
192
|
include Ferret::Analysis
|
@@ -275,7 +275,7 @@ class StandardTokenizerTest < Test::Unit::TestCase
|
|
275
275
|
assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next)
|
276
276
|
assert(! t.next())
|
277
277
|
end
|
278
|
-
end if (/utf-8/i
|
278
|
+
end if (/utf-8/i =~ Ferret.locale)
|
279
279
|
|
280
280
|
class RegExpTokenizerTest < Test::Unit::TestCase
|
281
281
|
include Ferret::Analysis
|
@@ -428,7 +428,7 @@ END
|
|
428
428
|
assert_equal(Token.new('szzzt', 256, 264), t.next)
|
429
429
|
assert(! t.next())
|
430
430
|
end
|
431
|
-
end if (/utf-8/i
|
431
|
+
end if (/utf-8/i =~ Ferret.locale)
|
432
432
|
|
433
433
|
class StopFilterTest < Test::Unit::TestCase
|
434
434
|
include Ferret::Analysis
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -150,7 +150,7 @@ class IndexTest < Test::Unit::TestCase
|
|
150
150
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
151
151
|
|
152
152
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
153
|
-
assert_raise(FileNotFoundError) do
|
153
|
+
assert_raise(Ferret::FileNotFoundError) do
|
154
154
|
Index.new(:path => fs_path,
|
155
155
|
:create_if_missing => false,
|
156
156
|
:default_field => :xxx)
|
@@ -31,6 +31,11 @@ module IndexReaderCommon
|
|
31
31
|
def do_test_term_enum()
|
32
32
|
te = @ir.terms(:author)
|
33
33
|
|
34
|
+
assert_equal('[{"term":"Leo","frequency":1},{"term":"Tolstoy","frequency":1}]', te.to_json);
|
35
|
+
te.field = :author
|
36
|
+
assert_equal('[["Leo",1],["Tolstoy",1]]', te.to_json(:fast));
|
37
|
+
te.field = :author
|
38
|
+
|
34
39
|
assert(te.next?)
|
35
40
|
assert_equal("Leo", te.term)
|
36
41
|
assert_equal(1, te.doc_freq)
|
@@ -99,6 +104,11 @@ module IndexReaderCommon
|
|
99
104
|
end
|
100
105
|
assert(! tde.next?)
|
101
106
|
|
107
|
+
tde = @ir.term_docs_for(:body, "Wally")
|
108
|
+
assert_equal('[{"document":0,"frequency":1},{"document":5,"frequency":1},{"document":18,"frequency":3},{"document":20,"frequency":6}]', tde.to_json)
|
109
|
+
tde = @ir.term_docs_for(:body, "Wally")
|
110
|
+
assert_equal('[[0,1],[5,1],[18,3],[20,6]]', tde.to_json(:fast))
|
111
|
+
|
102
112
|
do_test_term_docpos_enum_skip_to(tde)
|
103
113
|
|
104
114
|
# test term positions
|
@@ -124,6 +134,33 @@ module IndexReaderCommon
|
|
124
134
|
assert_nil(tde.next_position())
|
125
135
|
assert(! tde.next?)
|
126
136
|
|
137
|
+
tde = @ir.term_positions_for(:body, "read")
|
138
|
+
assert_equal('[' +
|
139
|
+
'{"document":1,"frequency":1,"positions":[3]},' +
|
140
|
+
'{"document":2,"frequency":2,"positions":[1,4]},' +
|
141
|
+
'{"document":6,"frequency":4,"positions":[3,4,5,6]},' +
|
142
|
+
'{"document":9,"frequency":3,"positions":[0,4,13]},' +
|
143
|
+
'{"document":10,"frequency":1,"positions":[1]},' +
|
144
|
+
'{"document":16,"frequency":2,"positions":[2,3]},' +
|
145
|
+
'{"document":17,"frequency":1,"positions":[2]},' +
|
146
|
+
'{"document":20,"frequency":1,"positions":[21]},' +
|
147
|
+
'{"document":21,"frequency":6,"positions":[3,4,5,8,9,10]}]',
|
148
|
+
tde.to_json())
|
149
|
+
tde = @ir.term_positions_for(:body, "read")
|
150
|
+
assert_equal('[' +
|
151
|
+
'[1,1,[3]],' +
|
152
|
+
'[2,2,[1,4]],' +
|
153
|
+
'[6,4,[3,4,5,6]],' +
|
154
|
+
'[9,3,[0,4,13]],' +
|
155
|
+
'[10,1,[1]],' +
|
156
|
+
'[16,2,[2,3]],' +
|
157
|
+
'[17,1,[2]],' +
|
158
|
+
'[20,1,[21]],' +
|
159
|
+
'[21,6,[3,4,5,8,9,10]]]',
|
160
|
+
tde.to_json(:fast))
|
161
|
+
|
162
|
+
tde = @ir.term_positions_for(:body, "read")
|
163
|
+
|
127
164
|
do_test_term_docpos_enum_skip_to(tde)
|
128
165
|
end
|
129
166
|
|
@@ -68,7 +68,7 @@ class SpansBasicTest < Test::Unit::TestCase
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def check_hits(query, expected, test_explain = false, top=nil)
|
71
|
-
top_docs = @searcher.search(query, {:limit => expected.length})
|
71
|
+
top_docs = @searcher.search(query, {:limit => expected.length + 1})
|
72
72
|
assert_equal(expected.length, top_docs.hits.size)
|
73
73
|
assert_equal(top, top_docs.hits[0].doc) if top
|
74
74
|
assert_equal(expected.length, top_docs.total_hits)
|
@@ -170,4 +170,21 @@ class SpansBasicTest < Test::Unit::TestCase
|
|
170
170
|
q = SpanOrQuery.new([nearq1, nearq2])
|
171
171
|
check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
|
172
172
|
end
|
173
|
+
|
174
|
+
def test_span_prefix_query_max_terms
|
175
|
+
@dir = RAMDirectory.new
|
176
|
+
iw = IndexWriter.new(:dir => @dir,
|
177
|
+
:analyzer => WhiteSpaceAnalyzer.new())
|
178
|
+
2000.times { |i| iw << {:field => "prefix#{i} term#{i}"} }
|
179
|
+
iw.close()
|
180
|
+
@searcher = Searcher.new(@dir)
|
181
|
+
|
182
|
+
pq = SpanPrefixQuery.new(:field, "prefix")
|
183
|
+
tq = SpanTermQuery.new(:field, "term1500")
|
184
|
+
q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
|
185
|
+
check_hits(q, [], false)
|
186
|
+
pq = SpanPrefixQuery.new(:field, "prefix", 2000)
|
187
|
+
q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
|
188
|
+
check_hits(q, [1500], false)
|
189
|
+
end
|
173
190
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.11.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.11.4
|
7
|
+
date: 2007-04-07 00:00:00 +10:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -13,7 +13,7 @@ homepage: http://ferret.davebalmain.com/trac
|
|
13
13
|
rubyforge_project: ferret
|
14
14
|
description: Ferret is a port of the Java Lucene project. It is a powerful indexing and search library.
|
15
15
|
autorequire: ferret
|
16
|
-
default_executable:
|
16
|
+
default_executable: ferret-browser
|
17
17
|
bindir: bin
|
18
18
|
has_rdoc: true
|
19
19
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
@@ -168,7 +168,20 @@ files:
|
|
168
168
|
- lib/ferret/field_infos.rb
|
169
169
|
- lib/ferret/document.rb
|
170
170
|
- lib/ferret/index.rb
|
171
|
+
- lib/ferret/browser.rb
|
171
172
|
- lib/ferret/number_tools.rb
|
173
|
+
- lib/ferret/browser/webrick.rb
|
174
|
+
- lib/ferret/browser/views/layout.rhtml
|
175
|
+
- lib/ferret/browser/views/term/index.rhtml
|
176
|
+
- lib/ferret/browser/views/term/termdocs.rhtml
|
177
|
+
- lib/ferret/browser/views/home/index.rhtml
|
178
|
+
- lib/ferret/browser/views/help/index.rhtml
|
179
|
+
- lib/ferret/browser/views/document/list.rhtml
|
180
|
+
- lib/ferret/browser/views/document/show.rhtml
|
181
|
+
- lib/ferret/browser/views/error/index.rhtml
|
182
|
+
- lib/ferret/browser/views/term-vector/index.rhtml
|
183
|
+
- lib/ferret/browser/s/style.css
|
184
|
+
- lib/ferret/browser/s/global.js
|
172
185
|
- test/test_helper.rb
|
173
186
|
- test/test_all.rb
|
174
187
|
- test/unit/tc_document.rb
|
@@ -227,8 +240,8 @@ extra_rdoc_files:
|
|
227
240
|
- ext/r_index.c
|
228
241
|
- ext/r_store.c
|
229
242
|
- ext/ferret.c
|
230
|
-
executables:
|
231
|
-
|
243
|
+
executables:
|
244
|
+
- ferret-browser
|
232
245
|
extensions:
|
233
246
|
- ext/extconf.rb
|
234
247
|
requirements: []
|