ferret 0.11.3 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ <%= @reader.term_positions_for(@field, @term).to_json(:fast) %>
@@ -0,0 +1,14 @@
1
+ module WEBrick
2
+ class FerretBrowserHandler < WEBrick::HTTPServlet::AbstractServlet
3
+ # Creates a FerretBrowserHandler, which answers for the application
4
+ # within +klass+.
5
+ def initialize(server, reader, path)
6
+ super(server)
7
+ @delegator = Ferret::Browser::Delegator.new(reader, path)
8
+ end
9
+ # Handler for WEBrick requests (also aliased as do_POST).
10
+ def do_GET(req, res)
11
+ res.status, res.content_type, res.body = @delegator.run(req.meta_vars)
12
+ end
13
+ end
14
+ end
data/lib/ferret/index.rb CHANGED
@@ -387,40 +387,54 @@ module Ferret::Index
387
387
  end
388
388
  end
389
389
 
390
- # Retrieve the document referenced by the document number +id+, if id is
391
- # an integer or the first document with term +id+ if +id+ is a term.
390
+ # Retrieves a document/documents from the index. The method for retrieval
391
+ # depends on the type of the argument passed.
392
392
  #
393
- # id:: The number of the document to retrieve, or the term used as the :id
394
- # for the document we wish to retrieve
395
- def doc(*args)
393
+ # If +arg+ is an Integer then return the document based on the internal
394
+ # document number.
395
+ #
396
+ # If +arg+ is a Range, then return the documents within the range based on
397
+ # internal document number.
398
+ #
399
+ # If +arg+ is a String then search for the first document with +arg+ in
400
+ # the +id+ field. The +id+ field is either :id or whatever you set
401
+ # :id_field parameter to when you create the Index object.
402
+ def doc(*arg)
396
403
  @dir.synchronize do
397
- ensure_reader_open()
398
- id = args[0]
404
+ id = arg[0]
399
405
  if id.kind_of?(String) or id.kind_of?(Symbol)
406
+ ensure_reader_open()
400
407
  term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
401
408
  return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil
409
+ else
410
+ ensure_reader_open(false)
411
+ return @reader[*arg]
402
412
  end
403
- return @reader[*args]
404
413
  end
405
414
  end
406
415
  alias :[] :doc
407
416
 
408
- # Delete the document referenced by the document number +id+ if +id+ is an
409
- # integer or all of the documents which have the term +id+ if +id+ is a
410
- # term..
417
+ # Deletes a document/documents from the index. The method for determining
418
+ # the document to delete depends on the type of the argument passed.
419
+ #
420
+ # If +arg+ is an Integer then delete the document based on the internal
421
+ # document number. Will raise an error if the document does not exist.
411
422
  #
412
- # id:: The number of the document to delete
413
- def delete(id)
423
+ # If +arg+ is a String then search for the documents with +arg+ in the
424
+ # +id+ field. The +id+ field is either :id or whatever you set :id_field
425
+ # parameter to when you create the Index object. Will fail quietly if the
426
+ # no document exists.
427
+ def delete(arg)
414
428
  @dir.synchrolock do
415
429
  ensure_writer_open()
416
- if id.is_a?(String) or id.is_a?(Symbol)
430
+ if arg.is_a?(String) or arg.is_a?(Symbol)
417
431
  ensure_writer_open()
418
- @writer.delete(@id_field, id.to_s)
419
- elsif id.is_a?(Integer)
432
+ @writer.delete(@id_field, arg.to_s)
433
+ elsif arg.is_a?(Integer)
420
434
  ensure_reader_open()
421
- cnt = @reader.delete(id)
435
+ cnt = @reader.delete(arg)
422
436
  else
423
- raise ArgumentError, "Cannot delete for id of type #{id.class}"
437
+ raise ArgumentError, "Cannot delete for arg of type #{arg.class}"
424
438
  end
425
439
  flush() if @auto_flush
426
440
  end
@@ -537,12 +551,15 @@ module Ferret::Index
537
551
  # index.
538
552
  def flush()
539
553
  @dir.synchronize do
540
- @searcher.close if @searcher
541
- @reader.close if @reader
542
- @writer.close if @writer
543
- @reader = nil
544
- @writer = nil
545
- @searcher = nil
554
+ if @reader
555
+ if @searcher
556
+ @searcher.close
557
+ @searcher = nil
558
+ end
559
+ @reader.commit
560
+ elsif @writer
561
+ @writer.commit
562
+ end
546
563
  end
547
564
  end
548
565
  alias :commit :flush
@@ -614,7 +631,7 @@ module Ferret::Index
614
631
  # false.
615
632
  def persist(directory, create = true)
616
633
  synchronize do
617
- flush()
634
+ close_all()
618
635
  old_dir = @dir
619
636
  if directory.is_a?(String)
620
637
  @dir = FSDirectory.new(directory, create)
@@ -684,19 +701,22 @@ module Ferret::Index
684
701
  end
685
702
 
686
703
  # returns the new reader if one is opened
687
- def ensure_reader_open()
704
+ def ensure_reader_open(get_latest = true)
688
705
  raise "tried to use a closed index" if not @open
689
706
  if @reader
690
- latest = false
691
- begin
692
- latest = @reader.latest?
693
- rescue Lock::LockError => le
694
- sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
695
- latest = @reader.latest?
696
- end
697
- if not latest
698
- @reader.close
699
- return @reader = IndexReader.new(@dir)
707
+ if get_latest
708
+ latest = false
709
+ begin
710
+ latest = @reader.latest?
711
+ rescue Lock::LockError => le
712
+ sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
713
+ latest = @reader.latest?
714
+ end
715
+ if not latest
716
+ @searcher.close if @searcher
717
+ @reader.close
718
+ return @reader = IndexReader.new(@dir)
719
+ end
700
720
  end
701
721
  else
702
722
  if @writer
@@ -737,6 +757,17 @@ module Ferret::Index
737
757
 
738
758
  return @searcher.search(query, options)
739
759
  end
760
+
761
+ def close_all()
762
+ @dir.synchronize do
763
+ @searcher.close if @searcher
764
+ @reader.close if @reader
765
+ @writer.close if @writer
766
+ @reader = nil
767
+ @searcher = nil
768
+ @writer = nil
769
+ end
770
+ end
740
771
  end
741
772
  end
742
773
 
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.11.3'
2
+ VERSION = '0.11.4'
3
3
  end
@@ -38,7 +38,7 @@ class AnalyzerTest < Test::Unit::TestCase
38
38
  assert_equal(Token.new("ADDRESS", 39, 46), t.next())
39
39
  assert(! t.next())
40
40
  end
41
- end if (/utf-8/i !~ Ferret.locale)
41
+ end if (/utf-8/i =~ Ferret.locale)
42
42
 
43
43
  class AsciiLetterAnalyzerTest < Test::Unit::TestCase
44
44
  include Ferret::Analysis
@@ -131,7 +131,7 @@ class LetterAnalyzerTest < Test::Unit::TestCase
131
131
  assert_equal(Token.new("öîí", 80, 86), t.next)
132
132
  assert(! t.next())
133
133
  end
134
- end if (/utf-8/i !~ Ferret.locale)
134
+ end if (/utf-8/i =~ Ferret.locale)
135
135
 
136
136
  class AsciiWhiteSpaceAnalyzerTest < Test::Unit::TestCase
137
137
  include Ferret::Analysis
@@ -214,7 +214,7 @@ class WhiteSpaceAnalyzerTest < Test::Unit::TestCase
214
214
  assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
215
215
  assert(! t.next())
216
216
  end
217
- end if (/utf-8/i !~ Ferret.locale)
217
+ end if (/utf-8/i =~ Ferret.locale)
218
218
 
219
219
  class AsciiStandardAnalyzerTest < Test::Unit::TestCase
220
220
  include Ferret::Analysis
@@ -350,7 +350,7 @@ class StandardAnalyzerTest < Test::Unit::TestCase
350
350
  assert_equal(Token.new('öîí', 142, 148), t2.next)
351
351
  assert(! t2.next())
352
352
  end
353
- end if (/utf-8/i !~ Ferret.locale)
353
+ end if (/utf-8/i =~ Ferret.locale)
354
354
 
355
355
  class PerFieldAnalyzerTest < Test::Unit::TestCase
356
356
  include Ferret::Analysis
@@ -545,4 +545,4 @@ class CustomAnalyzerTest < Test::Unit::TestCase
545
545
  assert_equal(Token.new("dêbater", 36, 44), t.next)
546
546
  assert(! t.next())
547
547
  end
548
- end if (/utf-8/i !~ Ferret.locale)
548
+ end if (/utf-8/i =~ Ferret.locale)
@@ -109,7 +109,7 @@ class LetterTokenizerTest < Test::Unit::TestCase
109
109
  assert_equal(Token.new('öîí', 80, 86), t.next)
110
110
  assert(! t.next())
111
111
  end
112
- end if (/utf-8/i !~ Ferret.locale)
112
+ end if (/utf-8/i =~ Ferret.locale)
113
113
 
114
114
  class AsciiWhiteSpaceTokenizerTest < Test::Unit::TestCase
115
115
  include Ferret::Analysis
@@ -186,7 +186,7 @@ class WhiteSpaceTokenizerTest < Test::Unit::TestCase
186
186
  assert_equal(Token.new('áägç®êëì¯úøã¬öîí', 55, 86), t.next)
187
187
  assert(! t.next())
188
188
  end
189
- end if (/utf-8/i !~ Ferret.locale)
189
+ end if (/utf-8/i =~ Ferret.locale)
190
190
 
191
191
  class AsciiStandardTokenizerTest < Test::Unit::TestCase
192
192
  include Ferret::Analysis
@@ -275,7 +275,7 @@ class StandardTokenizerTest < Test::Unit::TestCase
275
275
  assert_equal(Token.new('www.davebalmain.com/trac-site', 25, 61), t.next)
276
276
  assert(! t.next())
277
277
  end
278
- end if (/utf-8/i !~ Ferret.locale)
278
+ end if (/utf-8/i =~ Ferret.locale)
279
279
 
280
280
  class RegExpTokenizerTest < Test::Unit::TestCase
281
281
  include Ferret::Analysis
@@ -428,7 +428,7 @@ END
428
428
  assert_equal(Token.new('szzzt', 256, 264), t.next)
429
429
  assert(! t.next())
430
430
  end
431
- end if (/utf-8/i !~ Ferret.locale)
431
+ end if (/utf-8/i =~ Ferret.locale)
432
432
 
433
433
  class StopFilterTest < Test::Unit::TestCase
434
434
  include Ferret::Analysis
@@ -150,7 +150,7 @@ class IndexTest < Test::Unit::TestCase
150
150
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
151
151
 
152
152
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
153
- assert_raise(FileNotFoundError) do
153
+ assert_raise(Ferret::FileNotFoundError) do
154
154
  Index.new(:path => fs_path,
155
155
  :create_if_missing => false,
156
156
  :default_field => :xxx)
@@ -31,6 +31,11 @@ module IndexReaderCommon
31
31
  def do_test_term_enum()
32
32
  te = @ir.terms(:author)
33
33
 
34
+ assert_equal('[{"term":"Leo","frequency":1},{"term":"Tolstoy","frequency":1}]', te.to_json);
35
+ te.field = :author
36
+ assert_equal('[["Leo",1],["Tolstoy",1]]', te.to_json(:fast));
37
+ te.field = :author
38
+
34
39
  assert(te.next?)
35
40
  assert_equal("Leo", te.term)
36
41
  assert_equal(1, te.doc_freq)
@@ -99,6 +104,11 @@ module IndexReaderCommon
99
104
  end
100
105
  assert(! tde.next?)
101
106
 
107
+ tde = @ir.term_docs_for(:body, "Wally")
108
+ assert_equal('[{"document":0,"frequency":1},{"document":5,"frequency":1},{"document":18,"frequency":3},{"document":20,"frequency":6}]', tde.to_json)
109
+ tde = @ir.term_docs_for(:body, "Wally")
110
+ assert_equal('[[0,1],[5,1],[18,3],[20,6]]', tde.to_json(:fast))
111
+
102
112
  do_test_term_docpos_enum_skip_to(tde)
103
113
 
104
114
  # test term positions
@@ -124,6 +134,33 @@ module IndexReaderCommon
124
134
  assert_nil(tde.next_position())
125
135
  assert(! tde.next?)
126
136
 
137
+ tde = @ir.term_positions_for(:body, "read")
138
+ assert_equal('[' +
139
+ '{"document":1,"frequency":1,"positions":[3]},' +
140
+ '{"document":2,"frequency":2,"positions":[1,4]},' +
141
+ '{"document":6,"frequency":4,"positions":[3,4,5,6]},' +
142
+ '{"document":9,"frequency":3,"positions":[0,4,13]},' +
143
+ '{"document":10,"frequency":1,"positions":[1]},' +
144
+ '{"document":16,"frequency":2,"positions":[2,3]},' +
145
+ '{"document":17,"frequency":1,"positions":[2]},' +
146
+ '{"document":20,"frequency":1,"positions":[21]},' +
147
+ '{"document":21,"frequency":6,"positions":[3,4,5,8,9,10]}]',
148
+ tde.to_json())
149
+ tde = @ir.term_positions_for(:body, "read")
150
+ assert_equal('[' +
151
+ '[1,1,[3]],' +
152
+ '[2,2,[1,4]],' +
153
+ '[6,4,[3,4,5,6]],' +
154
+ '[9,3,[0,4,13]],' +
155
+ '[10,1,[1]],' +
156
+ '[16,2,[2,3]],' +
157
+ '[17,1,[2]],' +
158
+ '[20,1,[21]],' +
159
+ '[21,6,[3,4,5,8,9,10]]]',
160
+ tde.to_json(:fast))
161
+
162
+ tde = @ir.term_positions_for(:body, "read")
163
+
127
164
  do_test_term_docpos_enum_skip_to(tde)
128
165
  end
129
166
 
@@ -68,7 +68,7 @@ class SpansBasicTest < Test::Unit::TestCase
68
68
  end
69
69
 
70
70
  def check_hits(query, expected, test_explain = false, top=nil)
71
- top_docs = @searcher.search(query, {:limit => expected.length})
71
+ top_docs = @searcher.search(query, {:limit => expected.length + 1})
72
72
  assert_equal(expected.length, top_docs.hits.size)
73
73
  assert_equal(top, top_docs.hits[0].doc) if top
74
74
  assert_equal(expected.length, top_docs.total_hits)
@@ -170,4 +170,21 @@ class SpansBasicTest < Test::Unit::TestCase
170
170
  q = SpanOrQuery.new([nearq1, nearq2])
171
171
  check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
172
172
  end
173
+
174
+ def test_span_prefix_query_max_terms
175
+ @dir = RAMDirectory.new
176
+ iw = IndexWriter.new(:dir => @dir,
177
+ :analyzer => WhiteSpaceAnalyzer.new())
178
+ 2000.times { |i| iw << {:field => "prefix#{i} term#{i}"} }
179
+ iw.close()
180
+ @searcher = Searcher.new(@dir)
181
+
182
+ pq = SpanPrefixQuery.new(:field, "prefix")
183
+ tq = SpanTermQuery.new(:field, "term1500")
184
+ q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
185
+ check_hits(q, [], false)
186
+ pq = SpanPrefixQuery.new(:field, "prefix", 2000)
187
+ q = SpanNearQuery.new(:clauses => [pq, tq], :in_order => true)
188
+ check_hits(q, [1500], false)
189
+ end
173
190
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.11.3
7
- date: 2007-03-03 00:00:00 +11:00
6
+ version: 0.11.4
7
+ date: 2007-04-07 00:00:00 +10:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -13,7 +13,7 @@ homepage: http://ferret.davebalmain.com/trac
13
13
  rubyforge_project: ferret
14
14
  description: Ferret is a port of the Java Lucene project. It is a powerful indexing and search library.
15
15
  autorequire: ferret
16
- default_executable:
16
+ default_executable: ferret-browser
17
17
  bindir: bin
18
18
  has_rdoc: true
19
19
  required_ruby_version: !ruby/object:Gem::Version::Requirement
@@ -168,7 +168,20 @@ files:
168
168
  - lib/ferret/field_infos.rb
169
169
  - lib/ferret/document.rb
170
170
  - lib/ferret/index.rb
171
+ - lib/ferret/browser.rb
171
172
  - lib/ferret/number_tools.rb
173
+ - lib/ferret/browser/webrick.rb
174
+ - lib/ferret/browser/views/layout.rhtml
175
+ - lib/ferret/browser/views/term/index.rhtml
176
+ - lib/ferret/browser/views/term/termdocs.rhtml
177
+ - lib/ferret/browser/views/home/index.rhtml
178
+ - lib/ferret/browser/views/help/index.rhtml
179
+ - lib/ferret/browser/views/document/list.rhtml
180
+ - lib/ferret/browser/views/document/show.rhtml
181
+ - lib/ferret/browser/views/error/index.rhtml
182
+ - lib/ferret/browser/views/term-vector/index.rhtml
183
+ - lib/ferret/browser/s/style.css
184
+ - lib/ferret/browser/s/global.js
172
185
  - test/test_helper.rb
173
186
  - test/test_all.rb
174
187
  - test/unit/tc_document.rb
@@ -227,8 +240,8 @@ extra_rdoc_files:
227
240
  - ext/r_index.c
228
241
  - ext/r_store.c
229
242
  - ext/ferret.c
230
- executables: []
231
-
243
+ executables:
244
+ - ferret-browser
232
245
  extensions:
233
246
  - ext/extconf.rb
234
247
  requirements: []