stellr 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,7 @@
1
+ == 0.1.1 / 2008-08-06
2
+
3
+ * Add support for bulk indexing of multiple records
4
+
1
5
  == 0.1.0 / 2008-08-05
2
6
 
3
7
  * Initial release
data/README.txt CHANGED
@@ -87,7 +87,7 @@ Stellr is a Ferret based standalone search server featuring a DRB and (soon to c
87
87
 
88
88
  (The MIT License)
89
89
 
90
- Copyright (c) 2007 FIX
90
+ Copyright (c) 2008 Jens Kraemer, Benjamin Krause
91
91
 
92
92
  Permission is hereby granted, free of charge, to any person obtaining
93
93
  a copy of this software and associated documentation files (the
@@ -18,7 +18,7 @@ require 'stellr/search'
18
18
  $SAFE = 1
19
19
 
20
20
  module Stellr
21
- VERSION = '0.1.0'
21
+ VERSION = '0.1.1'
22
22
 
23
23
  def self.start_server( config )
24
24
  if config.script
@@ -6,12 +6,14 @@ module Stellr
6
6
  include Ferret::Index
7
7
  include Stellr::Utils::Shutdown
8
8
  include Stellr::Utils::Observable
9
- attr_reader :name
9
+ attr_reader :name, :logger
10
10
 
11
11
  def self.create( name, options )
12
+ log = (options[:logger] ||= (require 'logger'; Logger.new 'stellr.log'))
12
13
  collection_class = collection_class_for_options options
13
14
  collection = collection_class.new( name, options )
14
15
  if strategy_class = strategy_class_for_options( options )
16
+ log.debug "using strategy #{strategy_class}"
15
17
  strategy_class.new( collection, options )
16
18
  else
17
19
  collection
@@ -19,11 +21,11 @@ module Stellr
19
21
  end
20
22
 
21
23
  def initialize( name, options )
22
- @logger = options[:logger] || (require 'logger'; Logger.new 'stellr.log')
24
+ @logger = options[:logger]
23
25
  @name = name
24
26
  @options = options.dup
25
27
  end
26
-
28
+
27
29
  # called whenever the strategy thinks it's a good time do do something
28
30
  # timeconsuming (like switching indexes, optimizing, flushing, ...)
29
31
  def batch_finished
@@ -29,7 +29,9 @@ module Stellr
29
29
  end
30
30
 
31
31
  def sync_indexes
32
+ logger.debug "syncing #{searching_directory} to #{indexing_directory} ..."
32
33
  system("rsync -r --delete #{searching_directory}/ #{indexing_directory}")
34
+ logger.debug "done."
33
35
  end
34
36
 
35
37
  end
@@ -15,32 +15,39 @@ module Stellr
15
15
  #
16
16
  # Record may be a hash, or a Ferret::Document instance
17
17
  def add_record( record, boost = nil )
18
- raise ArgumentError.new("record must contain :id field") if record[:id].nil?
19
- if boost
20
- if Ferret::Document === record
21
- record.boost = boost
22
- else
23
- hash, record = record, Ferret::Document.new( boost )
24
- hash.each_pair do |k,v|
25
- record[k] = v
26
- end
27
- end
18
+ add_records [ [ record, boost ] ]
19
+ end
20
+ alias :<< :add_record
21
+
22
+ # adds multiple records at once
23
+ # records should be an array of hashes or of two-element arrays
24
+ # consisting of a hash and the record-specific boost value
25
+ def add_records(records)
26
+ return unless records.any?
27
+ records = if Hash === records.first
28
+ records.map{ |r| prepare_document r }
29
+ else
30
+ records.map{ |r, boost| prepare_document(r, boost) }
28
31
  end
29
32
  @writer_monitor.synchronize do
30
- @processed_records += 1
31
33
  w = writer
32
- w.delete :id, record[:id].to_s # ensure uniqueness by :id field
33
- w << record
34
+ records.each do |record|
35
+ @processed_records += 1
36
+ w.delete :id, record[:id].to_s # ensure uniqueness by :id field
37
+ w << record
38
+ end
39
+ w.commit
34
40
  end
35
41
  true
36
42
  end
37
- alias :<< :add_record
38
43
 
39
44
  def delete_record( record )
40
45
  raise ArgumentError.new("record must contain :id field") if record[:id].nil?
41
46
  @writer_monitor.synchronize do
47
+ w = writer
42
48
  @processed_records += 1
43
- writer.delete :id, record[:id].to_s
49
+ w.delete :id, record[:id].to_s
50
+ w.commit
44
51
  end
45
52
  true
46
53
  end
@@ -80,6 +87,21 @@ module Stellr
80
87
  end
81
88
 
82
89
  protected
90
+
91
+ def prepare_document(record, boost = nil)
92
+ raise ArgumentError.new("record must contain :id field") if record[:id].nil?
93
+ unless boost.nil?
94
+ if Ferret::Document === record
95
+ record.boost = boost
96
+ else
97
+ hash, record = record, Ferret::Document.new( boost )
98
+ hash.each_pair do |k,v|
99
+ record[k] = v
100
+ end
101
+ end
102
+ end
103
+ return record
104
+ end
83
105
 
84
106
  # should open a writer and return it
85
107
  def open_writer
@@ -68,8 +68,12 @@ module Stellr
68
68
  #
69
69
  #
70
70
  def register( name, options = {} )
71
+ raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
72
+ name.untaint
71
73
  @collections.synchronize do
72
- @collections[name] ||= create_collection( name, options )
74
+ collection = (@collections[name] ||= create_collection( name, options ))
75
+ save_collection_config name, options unless options.nil? or options.empty?
76
+ collection
73
77
  end
74
78
  end
75
79
 
@@ -117,24 +121,19 @@ module Stellr
117
121
  # if nil is given for options, the method tries to locate a previously
118
122
  # saved collection configuration and restore from it.
119
123
  def create_collection( name, options )
120
- raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
121
- name.untaint
122
- save_config = true
123
- if options.nil?
124
- options = load_collection_config name
125
- save_config = false
126
- end
124
+ options ||= load_collection_config name
127
125
  raise "No options given for collection #{name} and no stored configuration found." if options.nil?
128
126
 
129
127
  options[:path] = File.join( @config.data_dir, name )
130
- save_collection_config name, options if save_config
131
128
  return Collections::Base.create( name, {:logger => @logger}.merge(options) )
132
129
  end
133
130
 
131
+ # TODO move into collection?
134
132
  def save_collection_config( name, options )
135
133
  path = collection_config_path name
136
134
  ( File.open(path, 'w') << YAML.dump(options) ).close
137
135
  @logger.info "wrote collection config to #{path}"
136
+ @logger.debug "config is now:\n#{options.inspect}"
138
137
  end
139
138
 
140
139
  def load_collection_config( name )
@@ -4,7 +4,21 @@ module Stellr
4
4
 
5
5
  # Queueing strategy. Any index modifying methods return immediately, actions
6
6
  # are queued and executed asynchronously in order of arrival.
7
- #
7
+ #
8
+ # Unless you're using the static collection type, indexes will be switched
9
+ # whenever options[:max_batch_size] (which defaults to 200) is reached,
10
+ # and when the queue is empty.
11
+ #
12
+ # with static collections manually calling switch is required, and this call
13
+ # will block until the switch is actually done.
14
+ #
15
+ # However this does not mean that
16
+ # all records from the queue have been processed, the switch may also occur
17
+ # between processing of add_record or add_records calls.
18
+ #
19
+ # FIXME fix this: switch should be an operation that is enqueued just like
20
+ # add_record so it occurs at the point in time the client desires.
21
+ # Is implicit switching really that useful? Definitely not with static collections...
8
22
  class Queueing < Base
9
23
 
10
24
  def initialize( collection, options )
@@ -18,6 +32,10 @@ module Stellr
18
32
  enqueue :add, [record, boost]
19
33
  end
20
34
 
35
+ def add_records(records)
36
+ enqueue :bulk_add, records
37
+ end
38
+
21
39
  def delete_record( record )
22
40
  enqueue :delete, record
23
41
  end
@@ -47,12 +65,15 @@ module Stellr
47
65
  end
48
66
 
49
67
  # process a single task from the queue
50
- def process_record( action, record )
68
+ # TODO refacoring: rename to process_task
69
+ def process_record( action, data )
51
70
  case action
52
71
  when :add
53
- @collection.add_record( *record )
72
+ @collection.add_record( *data )
73
+ when :bulk_add
74
+ @collection.add_records data
54
75
  when :delete
55
- @collection.delete_record record
76
+ @collection.delete_record data
56
77
  else
57
78
  raise "UnknownAction"
58
79
  end
@@ -10,7 +10,7 @@ class RSyncCollectionTest < StellrTest
10
10
 
11
11
  def teardown
12
12
  super
13
- @collection.close
13
+ @collection.close if @collection
14
14
  end
15
15
 
16
16
  def test_create
@@ -67,6 +67,7 @@ class RSyncCollectionTest < StellrTest
67
67
 
68
68
  def default_collection_options( options = {} )
69
69
  { :recreate => false,
70
- :path => INDEX_TMP_TEST_DIR }.update( options )
70
+ :path => INDEX_TMP_TEST_DIR,
71
+ :logger => Logger.new('/tmp/stellr/test.log') }.update( options )
71
72
  end
72
73
  end
@@ -56,6 +56,24 @@ class ServerTest < StellrTest
56
56
  assert_equal 2, @server.size( 'default' )
57
57
  end
58
58
 
59
+ def test_index_multiple_records_arrays
60
+ @server.register 'default'
61
+ @server.add_records 'default',
62
+ [ [ { :id => 1, :text => 'hello world' }, 2],
63
+ [ { :id => 2, :text => 'hello world two' }, nil ] ]
64
+ @server.batch_finished 'default'
65
+ assert_equal 2, @server.size( 'default' )
66
+ end
67
+
68
+ def test_index_multiple_records_hashes
69
+ @server.register 'default'
70
+ @server.add_records 'default',
71
+ [ { :id => 1, :text => 'hello world' },
72
+ { :id => 2, :text => 'hello world two' } ]
73
+ @server.batch_finished 'default'
74
+ assert_equal 2, @server.size( 'default' )
75
+ end
76
+
59
77
  def test_delete_data_queued
60
78
  coll = "del-queued"
61
79
  @server.register coll, :strategy => :queueing
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stellr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Krause
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2008-08-04 00:00:00 +02:00
13
+ date: 2008-08-13 00:00:00 +02:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency