ferret 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ferret.rb CHANGED
@@ -22,7 +22,7 @@
22
22
  #++
23
23
  # :include: ../TUTORIAL
24
24
  module Ferret
25
- VERSION = '0.2.0'
25
+ VERSION = '0.2.1'
26
26
  end
27
27
 
28
28
  require 'ferret/utils'
@@ -68,6 +68,14 @@ module Ferret::Index
68
68
  # The default is true.
69
69
  # default_slop:: Set the default slop for phrase queries. This
70
70
  # defaults to 0.
71
+ # key:: Expert: This should only be used if you really
72
+ # know what you are doing. Basically you can set a
73
+ # field or an array of fields to be the key for the
74
+ # index. So if you add a document with a same key
75
+ # as an existing document, the existing document will
76
+ # be replaced by the new object. This will slow
77
+ # down indexing so it should not be used if
78
+ # performance is a concern.
71
79
  #
72
80
  # Some examples;
73
81
  #
@@ -86,6 +94,7 @@ module Ferret::Index
86
94
  options[:default_search_field] &&= options[:default_search_field].to_s
87
95
  options[:default_field] &&= options[:default_field].to_s
88
96
  options[:create_if_missing] = true if options[:create_if_missing].nil?
97
+ @key = [options[:key]].flatten if options[:key]
89
98
 
90
99
  if options[:path]
91
100
  @dir = FSDirectory.new(options[:path], options[:create])
@@ -197,7 +206,6 @@ module Ferret::Index
197
206
  # vectors with the data. Read more about this in Ferret::Document::Field.
198
207
  def add_document(doc, analyzer = nil)
199
208
  @dir.synchronize do
200
- ensure_writer_open()
201
209
  fdoc = nil
202
210
  if doc.is_a?(String)
203
211
  fdoc = Document.new
@@ -220,8 +228,15 @@ module Ferret::Index
220
228
  else
221
229
  raise ArgumentError, "Unknown document type #{doc.class}"
222
230
  end
223
- @has_writes = true
224
231
 
232
+ # delete existing documents with the same key
233
+ if @key
234
+ query = @key.map {|field| "+#{field}:#{fdoc[field]}" }.join(" ")
235
+ query_delete(query)
236
+ end
237
+
238
+ ensure_writer_open()
239
+ @has_writes = true
225
240
  @writer.add_document(fdoc, analyzer || @writer.analyzer)
226
241
  end
227
242
  end
@@ -335,7 +350,8 @@ module Ferret::Index
335
350
  # new_val:: The values we are updating. This can be a string in which case
336
351
  # the default field is updated, or it can be a hash, in which
337
352
  # case, all fields in the hash are updated. You can also pass a
338
- # full Document object but you must pass the doc_num as the id.
353
+ # full Document object, which will completely replace the
354
+ # documents you remove.
339
355
  def update(id, new_val)
340
356
  @dir.synchronize do
341
357
  if id.is_a?(String)
@@ -368,8 +384,11 @@ module Ferret::Index
368
384
  # parser) or an actual query object.
369
385
  # new_val:: The values we are updating. This can be a string in which case
370
386
  # the default field is updated, or it can be a hash, in which
371
- # case, all fields in the hash are updated. If you want to pass
372
- # a full document see #update.
387
+ # case, all fields in the hash are updated. You can also pass a
388
+ # full Document object, which will completely replace the
389
+ # documents you remove. You should be careful when passing a
390
+ # whole document to be sure that your query will return one and
391
+ # only result.
373
392
  def query_update(query, new_val)
374
393
  @dir.synchronize do
375
394
  ensure_searcher_open()
@@ -379,6 +398,8 @@ module Ferret::Index
379
398
  document = doc(id)
380
399
  if new_val.is_a?(Hash)
381
400
  new_val.each_pair {|name, content| document[name] = content.to_s}
401
+ elsif new_val.is_a?(Document)
402
+ document = new_val
382
403
  else
383
404
  document[@options[:default_field]] = new_val.to_s
384
405
  end
@@ -391,4 +391,41 @@ class IndexTest < Test::Unit::TestCase
391
391
  assert_equal("cool", index["4"][:tag])
392
392
  assert_equal(4, index.search("tag:cool").size)
393
393
  end
394
+
395
+ def test_index_key
396
+ data = [
397
+ {:id => 0, :val => "one"},
398
+ {:id => 0, :val => "two"},
399
+ {:id => 1, :val => "three"},
400
+ {:id => 1, :val => "four"},
401
+ ]
402
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
403
+ :key => "id")
404
+ data.each { |doc| index << doc }
405
+ assert_equal(2, index.size)
406
+ assert_equal("two", index[0][:val])
407
+ assert_equal("four", index[1][:val])
408
+ end
409
+
410
+ def test_index_multi_key
411
+ data = [
412
+ {:id => 0, :table => "product", :product => "tent"},
413
+ {:id => 0, :table => "location", :location => "first floor"},
414
+ {:id => 0, :table => "product", :product => "super tent"},
415
+ {:id => 0, :table => "location", :location => "second floor"},
416
+ {:id => 1, :table => "product", :product => "backback"},
417
+ {:id => 1, :table => "location", :location => "second floor"},
418
+ {:id => 1, :table => "location", :location => "first floor"},
419
+ {:id => 1, :table => "product", :product => "rucksack"},
420
+ {:id => 1, :table => "product", :product => "backpack"}
421
+ ]
422
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
423
+ :key => ["id", "table"])
424
+ data.each { |doc| index << doc }
425
+ assert_equal(4, index.size)
426
+ assert_equal("super tent", index[0][:product])
427
+ assert_equal("second floor", index[1][:location])
428
+ assert_equal("backpack", index[3][:product])
429
+ assert_equal("first floor", index[2][:location])
430
+ end
394
431
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
7
- date: 2005-11-12 00:00:00 +09:00
6
+ version: 0.2.1
7
+ date: 2005-11-14 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib