ferret 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ferret.rb CHANGED
@@ -22,7 +22,7 @@
22
22
  #++
23
23
  # :include: ../TUTORIAL
24
24
  module Ferret
25
- VERSION = '0.2.0'
25
+ VERSION = '0.2.1'
26
26
  end
27
27
 
28
28
  require 'ferret/utils'
@@ -68,6 +68,14 @@ module Ferret::Index
68
68
  # The default is true.
69
69
  # default_slop:: Set the default slop for phrase queries. This
70
70
  # defaults to 0.
71
+ # key:: Expert: This should only be used if you really
72
+ # know what you are doing. Basically you can set a
73
+ # field or an array of fields to be the key for the
74
+ # index. So if you add a document with a same key
75
+ # as an existing document, the existing document will
76
+ # be replaced by the new object. This will slow
77
+ # down indexing so it should not be used if
78
+ # performance is a concern.
71
79
  #
72
80
  # Some examples;
73
81
  #
@@ -86,6 +94,7 @@ module Ferret::Index
86
94
  options[:default_search_field] &&= options[:default_search_field].to_s
87
95
  options[:default_field] &&= options[:default_field].to_s
88
96
  options[:create_if_missing] = true if options[:create_if_missing].nil?
97
+ @key = [options[:key]].flatten if options[:key]
89
98
 
90
99
  if options[:path]
91
100
  @dir = FSDirectory.new(options[:path], options[:create])
@@ -197,7 +206,6 @@ module Ferret::Index
197
206
  # vectors with the data. Read more about this in Ferret::Document::Field.
198
207
  def add_document(doc, analyzer = nil)
199
208
  @dir.synchronize do
200
- ensure_writer_open()
201
209
  fdoc = nil
202
210
  if doc.is_a?(String)
203
211
  fdoc = Document.new
@@ -220,8 +228,15 @@ module Ferret::Index
220
228
  else
221
229
  raise ArgumentError, "Unknown document type #{doc.class}"
222
230
  end
223
- @has_writes = true
224
231
 
232
+ # delete existing documents with the same key
233
+ if @key
234
+ query = @key.map {|field| "+#{field}:#{fdoc[field]}" }.join(" ")
235
+ query_delete(query)
236
+ end
237
+
238
+ ensure_writer_open()
239
+ @has_writes = true
225
240
  @writer.add_document(fdoc, analyzer || @writer.analyzer)
226
241
  end
227
242
  end
@@ -335,7 +350,8 @@ module Ferret::Index
335
350
  # new_val:: The values we are updating. This can be a string in which case
336
351
  # the default field is updated, or it can be a hash, in which
337
352
  # case, all fields in the hash are updated. You can also pass a
338
- # full Document object but you must pass the doc_num as the id.
353
+ # full Document object, which will completely replace the
354
+ # documents you remove.
339
355
  def update(id, new_val)
340
356
  @dir.synchronize do
341
357
  if id.is_a?(String)
@@ -368,8 +384,11 @@ module Ferret::Index
368
384
  # parser) or an actual query object.
369
385
  # new_val:: The values we are updating. This can be a string in which case
370
386
  # the default field is updated, or it can be a hash, in which
371
- # case, all fields in the hash are updated. If you want to pass
372
- # a full document see #update.
387
+ # case, all fields in the hash are updated. You can also pass a
388
+ # full Document object, which will completely replace the
389
+ # documents you remove. You should be careful when passing a
390
+ # whole document to be sure that your query will return one and
391
+ # only result.
373
392
  def query_update(query, new_val)
374
393
  @dir.synchronize do
375
394
  ensure_searcher_open()
@@ -379,6 +398,8 @@ module Ferret::Index
379
398
  document = doc(id)
380
399
  if new_val.is_a?(Hash)
381
400
  new_val.each_pair {|name, content| document[name] = content.to_s}
401
+ elsif new_val.is_a?(Document)
402
+ document = new_val
382
403
  else
383
404
  document[@options[:default_field]] = new_val.to_s
384
405
  end
@@ -391,4 +391,41 @@ class IndexTest < Test::Unit::TestCase
391
391
  assert_equal("cool", index["4"][:tag])
392
392
  assert_equal(4, index.search("tag:cool").size)
393
393
  end
394
+
395
+ def test_index_key
396
+ data = [
397
+ {:id => 0, :val => "one"},
398
+ {:id => 0, :val => "two"},
399
+ {:id => 1, :val => "three"},
400
+ {:id => 1, :val => "four"},
401
+ ]
402
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
403
+ :key => "id")
404
+ data.each { |doc| index << doc }
405
+ assert_equal(2, index.size)
406
+ assert_equal("two", index[0][:val])
407
+ assert_equal("four", index[1][:val])
408
+ end
409
+
410
+ def test_index_multi_key
411
+ data = [
412
+ {:id => 0, :table => "product", :product => "tent"},
413
+ {:id => 0, :table => "location", :location => "first floor"},
414
+ {:id => 0, :table => "product", :product => "super tent"},
415
+ {:id => 0, :table => "location", :location => "second floor"},
416
+ {:id => 1, :table => "product", :product => "backback"},
417
+ {:id => 1, :table => "location", :location => "second floor"},
418
+ {:id => 1, :table => "location", :location => "first floor"},
419
+ {:id => 1, :table => "product", :product => "rucksack"},
420
+ {:id => 1, :table => "product", :product => "backpack"}
421
+ ]
422
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
423
+ :key => ["id", "table"])
424
+ data.each { |doc| index << doc }
425
+ assert_equal(4, index.size)
426
+ assert_equal("super tent", index[0][:product])
427
+ assert_equal("second floor", index[1][:location])
428
+ assert_equal("backpack", index[3][:product])
429
+ assert_equal("first floor", index[2][:location])
430
+ end
394
431
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
7
- date: 2005-11-12 00:00:00 +09:00
6
+ version: 0.2.1
7
+ date: 2005-11-14 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib