stellr 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,7 @@
1
+ == 0.1.1 / 2008-08-06
2
+
3
+ * Add support for bulk indexing of multiple records
4
+
1
5
  == 0.1.0 / 2008-08-05
2
6
 
3
7
  * Initial release
data/README.txt CHANGED
@@ -87,7 +87,7 @@ Stellr is a Ferret based standalone search server featuring a DRB and (soon to c
87
87
 
88
88
  (The MIT License)
89
89
 
90
- Copyright (c) 2007 FIX
90
+ Copyright (c) 2008 Jens Kraemer, Benjamin Krause
91
91
 
92
92
  Permission is hereby granted, free of charge, to any person obtaining
93
93
  a copy of this software and associated documentation files (the
@@ -18,7 +18,7 @@ require 'stellr/search'
18
18
  $SAFE = 1
19
19
 
20
20
  module Stellr
21
- VERSION = '0.1.0'
21
+ VERSION = '0.1.1'
22
22
 
23
23
  def self.start_server( config )
24
24
  if config.script
@@ -6,12 +6,14 @@ module Stellr
6
6
  include Ferret::Index
7
7
  include Stellr::Utils::Shutdown
8
8
  include Stellr::Utils::Observable
9
- attr_reader :name
9
+ attr_reader :name, :logger
10
10
 
11
11
  def self.create( name, options )
12
+ log = (options[:logger] ||= (require 'logger'; Logger.new 'stellr.log'))
12
13
  collection_class = collection_class_for_options options
13
14
  collection = collection_class.new( name, options )
14
15
  if strategy_class = strategy_class_for_options( options )
16
+ log.debug "using strategy #{strategy_class}"
15
17
  strategy_class.new( collection, options )
16
18
  else
17
19
  collection
@@ -19,11 +21,11 @@ module Stellr
19
21
  end
20
22
 
21
23
  def initialize( name, options )
22
- @logger = options[:logger] || (require 'logger'; Logger.new 'stellr.log')
24
+ @logger = options[:logger]
23
25
  @name = name
24
26
  @options = options.dup
25
27
  end
26
-
28
+
27
29
  # called whenever the strategy thinks it's a good time do do something
28
30
  # timeconsuming (like switching indexes, optimizing, flushing, ...)
29
31
  def batch_finished
@@ -29,7 +29,9 @@ module Stellr
29
29
  end
30
30
 
31
31
  def sync_indexes
32
+ logger.debug "syncing #{searching_directory} to #{indexing_directory} ..."
32
33
  system("rsync -r --delete #{searching_directory}/ #{indexing_directory}")
34
+ logger.debug "done."
33
35
  end
34
36
 
35
37
  end
@@ -15,32 +15,39 @@ module Stellr
15
15
  #
16
16
  # Record may be a hash, or a Ferret::Document instance
17
17
  def add_record( record, boost = nil )
18
- raise ArgumentError.new("record must contain :id field") if record[:id].nil?
19
- if boost
20
- if Ferret::Document === record
21
- record.boost = boost
22
- else
23
- hash, record = record, Ferret::Document.new( boost )
24
- hash.each_pair do |k,v|
25
- record[k] = v
26
- end
27
- end
18
+ add_records [ [ record, boost ] ]
19
+ end
20
+ alias :<< :add_record
21
+
22
+ # adds multiple records at once
23
+ # records should be an array of hashes or of two-element arrays
24
+ # consisting of a hash and the record-specific boost value
25
+ def add_records(records)
26
+ return unless records.any?
27
+ records = if Hash === records.first
28
+ records.map{ |r| prepare_document r }
29
+ else
30
+ records.map{ |r, boost| prepare_document(r, boost) }
28
31
  end
29
32
  @writer_monitor.synchronize do
30
- @processed_records += 1
31
33
  w = writer
32
- w.delete :id, record[:id].to_s # ensure uniqueness by :id field
33
- w << record
34
+ records.each do |record|
35
+ @processed_records += 1
36
+ w.delete :id, record[:id].to_s # ensure uniqueness by :id field
37
+ w << record
38
+ end
39
+ w.commit
34
40
  end
35
41
  true
36
42
  end
37
- alias :<< :add_record
38
43
 
39
44
  def delete_record( record )
40
45
  raise ArgumentError.new("record must contain :id field") if record[:id].nil?
41
46
  @writer_monitor.synchronize do
47
+ w = writer
42
48
  @processed_records += 1
43
- writer.delete :id, record[:id].to_s
49
+ w.delete :id, record[:id].to_s
50
+ w.commit
44
51
  end
45
52
  true
46
53
  end
@@ -80,6 +87,21 @@ module Stellr
80
87
  end
81
88
 
82
89
  protected
90
+
91
+ def prepare_document(record, boost = nil)
92
+ raise ArgumentError.new("record must contain :id field") if record[:id].nil?
93
+ unless boost.nil?
94
+ if Ferret::Document === record
95
+ record.boost = boost
96
+ else
97
+ hash, record = record, Ferret::Document.new( boost )
98
+ hash.each_pair do |k,v|
99
+ record[k] = v
100
+ end
101
+ end
102
+ end
103
+ return record
104
+ end
83
105
 
84
106
  # should open a writer and return it
85
107
  def open_writer
@@ -68,8 +68,12 @@ module Stellr
68
68
  #
69
69
  #
70
70
  def register( name, options = {} )
71
+ raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
72
+ name.untaint
71
73
  @collections.synchronize do
72
- @collections[name] ||= create_collection( name, options )
74
+ collection = (@collections[name] ||= create_collection( name, options ))
75
+ save_collection_config name, options unless options.nil? or options.empty?
76
+ collection
73
77
  end
74
78
  end
75
79
 
@@ -117,24 +121,19 @@ module Stellr
117
121
  # if nil is given for options, the method tries to locate a previously
118
122
  # saved collection configuration and restore from it.
119
123
  def create_collection( name, options )
120
- raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
121
- name.untaint
122
- save_config = true
123
- if options.nil?
124
- options = load_collection_config name
125
- save_config = false
126
- end
124
+ options ||= load_collection_config name
127
125
  raise "No options given for collection #{name} and no stored configuration found." if options.nil?
128
126
 
129
127
  options[:path] = File.join( @config.data_dir, name )
130
- save_collection_config name, options if save_config
131
128
  return Collections::Base.create( name, {:logger => @logger}.merge(options) )
132
129
  end
133
130
 
131
+ # TODO move into collection?
134
132
  def save_collection_config( name, options )
135
133
  path = collection_config_path name
136
134
  ( File.open(path, 'w') << YAML.dump(options) ).close
137
135
  @logger.info "wrote collection config to #{path}"
136
+ @logger.debug "config is now:\n#{options.inspect}"
138
137
  end
139
138
 
140
139
  def load_collection_config( name )
@@ -4,7 +4,21 @@ module Stellr
4
4
 
5
5
  # Queueing strategy. Any index modifying methods return immediately, actions
6
6
  # are queued and executed asynchronously in order of arrival.
7
- #
7
+ #
8
+ # Unless you're using the static collection type, indexes will be switched
9
+ # whenever options[:max_batch_size] (which defaults to 200) is reached,
10
+ # and when the queue is empty.
11
+ #
12
+ # with static collections manually calling switch is required, and this call
13
+ # will block until the switch is actually done.
14
+ #
15
+ # However this does not mean that
16
+ # all records from the queue have been processed, the switch may also occur
17
+ # between processing of add_record or add_records calls.
18
+ #
19
+ # FIXME fix this: switch should be an operation that is enqueued just like
20
+ # add_record so it occurs at the point in time the client desires.
21
+ # Is implicit switching really that useful? Definitely not with static collections...
8
22
  class Queueing < Base
9
23
 
10
24
  def initialize( collection, options )
@@ -18,6 +32,10 @@ module Stellr
18
32
  enqueue :add, [record, boost]
19
33
  end
20
34
 
35
+ def add_records(records)
36
+ enqueue :bulk_add, records
37
+ end
38
+
21
39
  def delete_record( record )
22
40
  enqueue :delete, record
23
41
  end
@@ -47,12 +65,15 @@ module Stellr
47
65
  end
48
66
 
49
67
  # process a single task from the queue
50
- def process_record( action, record )
68
+ # TODO refacoring: rename to process_task
69
+ def process_record( action, data )
51
70
  case action
52
71
  when :add
53
- @collection.add_record( *record )
72
+ @collection.add_record( *data )
73
+ when :bulk_add
74
+ @collection.add_records data
54
75
  when :delete
55
- @collection.delete_record record
76
+ @collection.delete_record data
56
77
  else
57
78
  raise "UnknownAction"
58
79
  end
@@ -10,7 +10,7 @@ class RSyncCollectionTest < StellrTest
10
10
 
11
11
  def teardown
12
12
  super
13
- @collection.close
13
+ @collection.close if @collection
14
14
  end
15
15
 
16
16
  def test_create
@@ -67,6 +67,7 @@ class RSyncCollectionTest < StellrTest
67
67
 
68
68
  def default_collection_options( options = {} )
69
69
  { :recreate => false,
70
- :path => INDEX_TMP_TEST_DIR }.update( options )
70
+ :path => INDEX_TMP_TEST_DIR,
71
+ :logger => Logger.new('/tmp/stellr/test.log') }.update( options )
71
72
  end
72
73
  end
@@ -56,6 +56,24 @@ class ServerTest < StellrTest
56
56
  assert_equal 2, @server.size( 'default' )
57
57
  end
58
58
 
59
+ def test_index_multiple_records_arrays
60
+ @server.register 'default'
61
+ @server.add_records 'default',
62
+ [ [ { :id => 1, :text => 'hello world' }, 2],
63
+ [ { :id => 2, :text => 'hello world two' }, nil ] ]
64
+ @server.batch_finished 'default'
65
+ assert_equal 2, @server.size( 'default' )
66
+ end
67
+
68
+ def test_index_multiple_records_hashes
69
+ @server.register 'default'
70
+ @server.add_records 'default',
71
+ [ { :id => 1, :text => 'hello world' },
72
+ { :id => 2, :text => 'hello world two' } ]
73
+ @server.batch_finished 'default'
74
+ assert_equal 2, @server.size( 'default' )
75
+ end
76
+
59
77
  def test_delete_data_queued
60
78
  coll = "del-queued"
61
79
  @server.register coll, :strategy => :queueing
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stellr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Krause
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2008-08-04 00:00:00 +02:00
13
+ date: 2008-08-13 00:00:00 +02:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency