elastictastic 0.5.0 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +161 -10
  3. data/lib/elastictastic/adapter.rb +84 -0
  4. data/lib/elastictastic/association.rb +6 -0
  5. data/lib/elastictastic/basic_document.rb +213 -0
  6. data/lib/elastictastic/bulk_persistence_strategy.rb +64 -19
  7. data/lib/elastictastic/callbacks.rb +18 -12
  8. data/lib/elastictastic/child_collection_proxy.rb +15 -11
  9. data/lib/elastictastic/client.rb +47 -24
  10. data/lib/elastictastic/configuration.rb +59 -4
  11. data/lib/elastictastic/dirty.rb +43 -28
  12. data/lib/elastictastic/discrete_persistence_strategy.rb +48 -23
  13. data/lib/elastictastic/document.rb +1 -85
  14. data/lib/elastictastic/embedded_document.rb +34 -0
  15. data/lib/elastictastic/errors.rb +17 -5
  16. data/lib/elastictastic/field.rb +3 -0
  17. data/lib/elastictastic/mass_assignment_security.rb +2 -4
  18. data/lib/elastictastic/middleware.rb +66 -84
  19. data/lib/elastictastic/multi_get.rb +30 -0
  20. data/lib/elastictastic/multi_search.rb +70 -0
  21. data/lib/elastictastic/nested_document.rb +3 -27
  22. data/lib/elastictastic/new_relic_instrumentation.rb +8 -8
  23. data/lib/elastictastic/observing.rb +8 -6
  24. data/lib/elastictastic/optimistic_locking.rb +57 -0
  25. data/lib/elastictastic/parent_child.rb +56 -54
  26. data/lib/elastictastic/persistence.rb +16 -16
  27. data/lib/elastictastic/properties.rb +136 -96
  28. data/lib/elastictastic/railtie.rb +1 -1
  29. data/lib/elastictastic/rotor.rb +105 -0
  30. data/lib/elastictastic/scope.rb +186 -56
  31. data/lib/elastictastic/server_error.rb +20 -1
  32. data/lib/elastictastic/test_helpers.rb +152 -97
  33. data/lib/elastictastic/thrift/constants.rb +12 -0
  34. data/lib/elastictastic/thrift/rest.rb +83 -0
  35. data/lib/elastictastic/thrift/types.rb +124 -0
  36. data/lib/elastictastic/thrift_adapter.rb +61 -0
  37. data/lib/elastictastic/transport_methods.rb +27 -0
  38. data/lib/elastictastic/validations.rb +11 -13
  39. data/lib/elastictastic/version.rb +1 -1
  40. data/lib/elastictastic.rb +148 -27
  41. data/spec/environment.rb +1 -1
  42. data/spec/examples/bulk_persistence_strategy_spec.rb +151 -23
  43. data/spec/examples/callbacks_spec.rb +65 -34
  44. data/spec/examples/dirty_spec.rb +160 -1
  45. data/spec/examples/document_spec.rb +168 -106
  46. data/spec/examples/middleware_spec.rb +1 -61
  47. data/spec/examples/multi_get_spec.rb +127 -0
  48. data/spec/examples/multi_search_spec.rb +113 -0
  49. data/spec/examples/observing_spec.rb +24 -3
  50. data/spec/examples/optimistic_locking_spec.rb +417 -0
  51. data/spec/examples/parent_child_spec.rb +73 -33
  52. data/spec/examples/properties_spec.rb +53 -0
  53. data/spec/examples/rotor_spec.rb +132 -0
  54. data/spec/examples/scope_spec.rb +78 -18
  55. data/spec/examples/search_spec.rb +26 -0
  56. data/spec/examples/validation_spec.rb +7 -1
  57. data/spec/models/author.rb +1 -1
  58. data/spec/models/blog.rb +2 -0
  59. data/spec/models/comment.rb +1 -1
  60. data/spec/models/photo.rb +9 -0
  61. data/spec/models/post.rb +3 -0
  62. metadata +97 -78
  63. data/lib/elastictastic/resource.rb +0 -4
  64. data/spec/examples/active_model_lint_spec.rb +0 -20
data/LICENSE CHANGED
@@ -1,5 +1,5 @@
1
1
  The MIT License (MIT)
2
- Copyright (c) 2011 Mat Brown
2
+ Copyright (c) 2011-2012 Brewster Inc., Mat Brown
3
3
 
4
4
  Permission is hereby granted, free of charge, to any person obtaining a copy of
5
5
  this software and associated documentation files (the "Software"), to deal in
data/README.md CHANGED
@@ -35,6 +35,8 @@ the `field` class macro:
35
35
 
36
36
  ```ruby
37
37
  class Post
38
+ include Elastictastic::Document
39
+
38
40
  field :title
39
41
  end
40
42
  ```
@@ -82,7 +84,27 @@ field :title,
82
84
  }
83
85
  ```
84
86
 
85
- ### Embedded Objects ###
87
+ ### Document Boost ###
88
+
89
+ Defining a
90
+ [document boost](http://www.elasticsearch.org/guide/reference/mapping/boost-field.html)
91
+ will increase or decrease a document's score in search results based on the
92
+ value of a field in the document. A boost of 1.0 is neutral. To define a boost
93
+ field, use the `boost` class macro:
94
+
95
+ ```ruby
96
+ class Post
97
+ include Elastictastic::Document
98
+
99
+ field :score, :type => 'integer'
100
+ boost :score
101
+ end
102
+ ```
103
+
104
+ By default, if the boost field is empty, a score of 1.0 will be applied. You can
105
+ override this by passing a `'null_value'` option into the boost method.
106
+
107
+ ### Embedded objects ###
86
108
 
87
109
  ElasticSearch supports deep nesting of properties by way of
88
110
  [object fields](http://www.elasticsearch.org/guide/reference/mapping/object-type.html).
@@ -98,13 +120,13 @@ class Post
98
120
  end
99
121
  ```
100
122
 
101
- The class that's embedded should include the `Elastictastic::Resource` mixin,
123
+ The class that's embedded should include the `Elastictastic::NestedDocument` mixin,
102
124
  which exposes the same configuration DSL as `Elastictastic::Document` but does
103
125
  not give the class the functionality of a top-level persistent object:
104
126
 
105
127
  ```ruby
106
128
  class Author
107
- include Elastictastic::Resource
129
+ include Elastictastic::NestedDocument
108
130
 
109
131
  field :name
110
132
  field :email, :index => 'not_analyzed'
@@ -182,13 +204,21 @@ explicit support for this at the moment, although you can use e.g.
182
204
  `Post.mapping` to retrieve the mapping structure which you can then merge into
183
205
  your template.
184
206
 
185
- ### Reserved Attributes ###
207
+ ### Reserved attributes ###
186
208
 
187
209
  All `Elastictastic::Document` models have an `id` and an `index` field, which
188
210
  combine to define the full resource locator for the document in ElasticSearch.
189
211
  You should not define fields or methods with these names. You may, however, set
190
212
  the id explicitly on new (not yet saved) model instances.
191
213
 
214
+ ### ActiveModel ###
215
+
216
+ Elastictastic documents include all the usual ActiveModel functionality:
217
+ validations, lifecycle hooks, observers, dirty-tracking, mass-assignment
218
+ security, and the like. If you would like to squeeze a bit of extra performance
219
+ out of the library at the cost of convenience, you can include the
220
+ `Elastictastic::BasicDocument` module instead of `Elastictastic::Document`.
221
+
192
222
  ## Persistence ##
193
223
 
194
224
  Elastictastic models are persisted the usual way, namely by calling `save`:
@@ -199,7 +229,7 @@ post.title = 'You know, for search.'
199
229
  post.save
200
230
  ```
201
231
 
202
- To retrieve a document from the data store, use `get`:
232
+ To retrieve a document from the data store, use `find`:
203
233
 
204
234
  ```ruby
205
235
  Post.find('123')
@@ -241,15 +271,136 @@ the `in_index` class method:
241
271
 
242
272
  ```ruby
243
273
  new_post = Post.in_index('my_special_index').new # create in an index
244
- post = Post.in_index('my_special_index').get('123') # retrieve from an index
274
+ post = Post.in_index('my_special_index').find('123') # retrieve from an index
245
275
  ```
246
276
 
247
277
  To retrieve documents from multiple indices at the same time, pass a hash into
248
- `get` where the keys are index names and the values are the IDs you wish to
278
+ `find` where the keys are index names and the values are the IDs you wish to
249
279
  retrieve from that index:
250
280
 
251
281
  ```ruby
252
- Post.get('default' => ['123', '456'], 'my_special_index' => '789')
282
+ Post.find('default' => ['123', '456'], 'my_special_index' => '789')
283
+ ```
284
+
285
+ ### Bulk operations ###
286
+
287
+ If you are writing a large amount of data to ElasticSearch in a single process,
288
+ use of the
289
+ [bulk API](http://www.elasticsearch.org/guide/reference/api/bulk.html)
290
+ is encouraged. To perform bulk operations using Elastictastic, simply wrap your
291
+ operations in a `bulk` block:
292
+
293
+ ```ruby
294
+ Elastictastic.bulk do
295
+ params[:posts].each do |post_params|
296
+ post = Post.new(post_params)
297
+ post.save
298
+ end
299
+ end
300
+ ```
301
+
302
+ All create, update, and destroy operations inside the block will be executed in
303
+ a single bulk request when the block completes. If you are performing an
304
+ indefinite number of operations in a bulk block, you can pass an `:auto_flush`
305
+ option to flush the bulk buffer after the specified number of operations:
306
+
307
+ ```ruby
308
+ Elastictastic.bulk(:auto_flush => 100) do
309
+ 150.times { Post.new.save! }
310
+ end
311
+ ```
312
+
313
+ The above will perform two bulk requests: the first after the first 100
314
+ operations, and the second when the block completes.
315
+
316
+ Note that the nature of bulk writes means that any operation inside a bulk block
317
+ is essentially asynchronous: instances are not created, updated, or destroyed
318
+ immediately upon calling `save` or `destroy`, but rather when the bulk block
319
+ exits. You may pass a block to `save` and `destroy` to provide a callback for
320
+ when the instance is actually persisted and its local state updated. Let's say,
321
+ for instance, we wish to expand the example above to pass the IDs of the newly
322
+ created posts to our view layer:
323
+
324
+ ```ruby
325
+ @ids = []
326
+ Elastictastic.bulk do
327
+ params[:posts].each do |post_params|
328
+ post = Post.new(post_params)
329
+ post.save do |e|
330
+ @ids << post.id
331
+ end
332
+ end
333
+ end
334
+ ```
335
+
336
+ If the save was not successful (due to a duplicate ID or a version mismatch,
337
+ for instance), the `e` argument to the block will be passed an exception object;
338
+ if the save was successful, the argument will be nil.
339
+
340
+ ### Concurrent document creation ###
341
+
342
+ When Elastictastic creates a document with an application-defined ID, it uses
343
+ the `_create` verb in ElasticSearch, ensuring that a document with that ID does
344
+ not already exist. If the document does already exist, an
345
+ `Elastictastic::ServerError::DocumentAlreadyExistsEngineException` will be
346
+ raised. In the case where multiple processes may attempt concurrent creation of
347
+ the same document, you can gracefully handle concurrent creation using the
348
+ `::create_or_update` class method on your document class. This will first
349
+ attempt to create the document; if a document with that ID already exists, it
350
+ will then load the document and modify it using the block passed:
351
+
352
+ ```ruby
353
+ Post.create_or_update('1') do |post|
354
+ post.title = 'My Post'
355
+ end
356
+ ```
357
+
358
+ In the above case, Elastictastic will first attempt to create a new post with ID
359
+ "1" and title "My Post". If a Post with that ID already exists, it will load it,
360
+ set its title to "My Post", and save it. The update uses the `::update` method
361
+ (see next section) to ensure that concurrent modification doesn't cause data to
362
+ be lost.
363
+
364
+ ### Optimistic locking ###
365
+
366
+ Elastictastic provides optimistic locking via ElasticSearch's built-in
367
+ [document versioning](http://www.elasticsearch.org/guide/reference/api/index_.html).
368
+ When a document is retrieved from persistence, it carries a version, which is a
369
+ number that increments from 1 on each update. When Elastictastic models are
370
+ updated, the document version that it carried when it was loaded is passed into
371
+ the update operation; if this version does not match ElasticSearch's current
372
+ version for that document, it indicates that another process has modified the
373
+ document concurrently, and an
374
+ `Elastictastic::ServerError::VersionConflictEngineException` is raised. This
375
+ prevents data loss through concurrent conflicting updates.
376
+
377
+ The easiest way to guard against concurrent modification is to use the
378
+ `::update` class method to make changes to existing documents. Consider the
379
+ following example:
380
+
381
+ ```ruby
382
+ Post.update('12345') do |post|
383
+ post.title = 'New Title'
384
+ end
385
+ ```
386
+
387
+ In the above, the Post with ID '12345' is loaded from ElasticSearch and yielded
388
+ to the block. When the block completes, the instance is saved back to
389
+ ElasticSearch. If this save results in a version conflict, a new instance is
390
+ loaded from ElasticSearch and the block is run again. The process repeats until
391
+ a successful update.
392
+
393
+ This method will work inside a bulk operation, but note that if the first update
394
+ generates a version conflict, additional updates will occur in discrete
395
+ requests, not as part of any bulk operation.
396
+
397
+ If you wish to safely update documents retrieved from a search scope
398
+ (see below), use the `update_each` method:
399
+
400
+ ```ruby
401
+ Post.query { constant_score { filter { term(:blog_id => 1) }}}.update_each do |post|
402
+ post.title = post.title.upcase
403
+ end
253
404
  ```
254
405
 
255
406
  ## Search ##
@@ -263,7 +414,7 @@ or Mongoid:
263
414
 
264
415
  ```ruby
265
416
  Post.query(:query_string => { :query => 'pizza' }).facets(:cuisine => { :term => { :field => :tags }}).from(10).size(10)
266
- # Generates { :query => { :query_string => { :query => 'pizza' }}, :facets => { :cuisine => { :term => { :field => :tags }}}, :from => 10, :size => 10 }
417
+ # Generates {"query": {"query_string": {"query": "pizza"}}, "facets": {"cuisine": {"term": {"field": "tags" }}}, "from": 10, "size": 10}
267
418
  ```
268
419
 
269
420
  Elastictastic also has an alternate block-based query builder, if you prefer:
@@ -307,7 +458,7 @@ Post.highlight { fields(:title => {}) }.find_each do |post, hit|
307
458
  end
308
459
  ```
309
460
 
310
- Search scope also expose a #find_in_batches method, which also yields the raw
461
+ Search scopes also expose a `#find_in_batches` method, which also yields the raw
311
462
  hit. The following code gives the same result as the previous example:
312
463
 
313
464
  ```ruby
@@ -0,0 +1,84 @@
1
+ require 'elastictastic/transport_methods'
2
+
3
+ module Elastictastic
4
+
5
+ class Adapter
6
+
7
+ include TransportMethods
8
+
9
+ Response = Struct.new(:status, :headers, :body)
10
+
11
+ def self.[](str)
12
+ case str
13
+ when nil then NetHttpAdapter
14
+ when /^[a-z_]+$/ then Elastictastic.const_get("#{str.to_s.classify}Adapter")
15
+ else str.constantize
16
+ end
17
+ end
18
+
19
+ def initialize(host, options = {})
20
+ @host = host
21
+ @request_timeout = options[:request_timeout]
22
+ @connect_timeout = options[:connect_timeout]
23
+ end
24
+
25
+ end
26
+
27
+ class NetHttpAdapter < Adapter
28
+
29
+ def initialize(host, options = {})
30
+ super
31
+ uri = URI.parse(host)
32
+ @connection = Net::HTTP.new(uri.host, uri.port)
33
+ @connection.read_timeout = @request_timeout
34
+ end
35
+
36
+ def request(method, path, body = nil)
37
+ response =
38
+ case method
39
+ when :head then @connection.head(path)
40
+ when :get then @connection.get(path)
41
+ when :post then @connection.post(path, body.to_s)
42
+ when :put then @connection.put(path, body.to_s)
43
+ when :delete then @connection.delete(path)
44
+ else raise ArgumentError, "Unsupported method #{method.inspect}"
45
+ end
46
+ Response.new(response.code.to_i, response.to_hash, response.body)
47
+ rescue Errno::ECONNREFUSED, Timeout::Error, SocketError => e
48
+ raise ConnectionFailed, e
49
+ end
50
+
51
+ end
52
+
53
+ class ExconAdapter < Adapter
54
+
55
+ def request(method, path, body = nil)
56
+ response = connection.request(
57
+ :body => body, :method => method, :path => path
58
+ )
59
+ Response.new(response.status, response.headers, response.body)
60
+ rescue Excon::Errors::Error => e
61
+ connection.reset
62
+ raise ConnectionFailed, e
63
+ end
64
+
65
+ private
66
+
67
+ def connection
68
+ @connection ||= Excon.new(@host, connection_params)
69
+ end
70
+
71
+ def connection_params
72
+ @connection_params ||= {}.tap do |params|
73
+ if @request_timeout
74
+ params[:read_timeout] = params[:write_timeout] = @request_timeout
75
+ end
76
+ if @connect_timeout
77
+ params[:connect_timeout] = @connect_timeout
78
+ end
79
+ end
80
+ end
81
+
82
+ end
83
+
84
+ end
@@ -1,4 +1,10 @@
1
1
  module Elastictastic
2
+ #
3
+ # Container for information about generic Elastictastic associations --
4
+ # this might be an embed association or a parent/child association.
5
+ #
6
+ # @api private
7
+ #
2
8
  class Association
3
9
  attr_reader :name, :options
4
10
 
@@ -0,0 +1,213 @@
1
+ module Elastictastic
2
+ #
3
+ # The top-level module mixed in to classes which will be mapped as
4
+ # ElasticSearch documents. Note that most people will want to use the Document
5
+ # mixin, which extends BasicDocument with ActiveModel functionality such as
6
+ # validations, lifecycle hooks, observers, mass-assignment security, etc. The
7
+ # BasicDocument module is exposed directly for those who wish to avoid the
8
+ # performance penalty associated with ActiveModel functionality, or those who
9
+ # wish to only mix in the ActiveModel modules they need.
10
+ #
11
+ # Most of the functionality for BasicDocument is provided by submodules; see
12
+ # below.
13
+ #
14
+ # @see Document
15
+ # @see Scoped
16
+ # @see Properties
17
+ # @see Persistence
18
+ # @see OptimisticLocking
19
+ # @see ParentChild
20
+ #
21
+ module BasicDocument
22
+ extend ActiveSupport::Concern
23
+
24
+ included do
25
+ extend Scoped
26
+ include Properties
27
+ include Persistence
28
+ include OptimisticLocking
29
+ include ParentChild
30
+
31
+ extend ActiveModel::Naming
32
+ include ActiveModel::Conversion
33
+ include ActiveModel::Serializers::JSON
34
+ include ActiveModel::Serializers::Xml
35
+
36
+ self.include_root_in_json = false
37
+ end
38
+
39
+ module ClassMethods
40
+ #
41
+ # Retrieve one or more documents by ID.
42
+ #
43
+ # @param (see Elastictastic::Scope#find)
44
+ # @overload (see Elastictastic::Scope#find)
45
+ #
46
+
47
+ #
48
+ # @method destroy_all
49
+ #
50
+ # Destroy all instances of this class in the default index
51
+ #
52
+
53
+ #
54
+ # @method sync_mapping
55
+ #
56
+ # Push the mapping defined in this class to ElasticSearch. Be sure to do
57
+ # this before saving instances of your class, or after making changes to
58
+ # the class's mapping (e.g. adding fields)
59
+ #
60
+
61
+ #
62
+ # @method find_each(batch_options = {}) {|document, hit| ... }
63
+ #
64
+ # Iterate over all documents in the default index, retrieving documents
65
+ # in batches using a cursor, but yielding them one by one.
66
+ #
67
+ # @param (see Elastictastic::Scope#find_each)
68
+ # @option (see Elastictastic::Scope#find_each)
69
+ # @yield (see Elastictastic::Scope#find_each)
70
+ # @yieldparam (see Elastictastic::Scope#find_each)
71
+ # @return (see Elastictastic::Scope#find_each)
72
+ #
73
+
74
+ #
75
+ # @method find_in_batches(batch_options = {}) {|batch| ... }
76
+ #
77
+ # Retrieve all documents in the default index, yielding them in batches.
78
+ #
79
+ # @param (see Elastictastic::Scope#find_in_batches)
80
+ # @option (see Elastictastic::Scope#find_in_batches)
81
+ # @yield (see Elastictastic::Scope#find_in_batches)
82
+ # @yieldparam (see Elastictastic::Scope#find_in_batches)
83
+ # @return (see Elastictastic::Scope#find_in_batches)
84
+ #
85
+
86
+ #
87
+ # @method first
88
+ #
89
+ # @return [Document] The "first" document in the index ("first" is
90
+ # undefined).
91
+ #
92
+
93
+ #
94
+ # @method count
95
+ #
96
+ # @return [Fixnum] The number of documents of this type in the default index.
97
+ #
98
+
99
+ #
100
+ # @method empty?
101
+ #
102
+ # @return [TrueClass,FalseClass] True if there are no documents of this
103
+ # type in the default index.
104
+ #
105
+
106
+ #
107
+ # @method any?
108
+ #
109
+ # @return [TrueClass,FalseClass] True if there are documents of this type
110
+ # in the default index.
111
+ #
112
+
113
+ delegate :find, :destroy_all, :sync_mapping, :inspect, :find_each,
114
+ :find_in_batches, :first, :count, :empty?, :any?, :all,
115
+ :query, :filter, :from, :size, :sort, :highlight, :fields,
116
+ :script_fields, :preference, :facets, :routing,
117
+ :to => :current_scope
118
+
119
+ def mapping
120
+ mapping_for_type = { 'properties' => properties }
121
+ mapping_for_type['_boost'] = @_boost if @_boost
122
+ if @_routing_field
123
+ mapping_for_type['_routing'] = {
124
+ 'path' => @_routing_field.to_s,
125
+ 'required' => @_routing_required
126
+ }
127
+ end
128
+ { type => mapping_for_type }
129
+ end
130
+
131
+ def type
132
+ name.underscore
133
+ end
134
+
135
+ def in_index(name_or_index)
136
+ Scope.new(Elastictastic::Index(name_or_index), self)
137
+ end
138
+
139
+ def scoped(params)
140
+ current_scope.scoped(params)
141
+ end
142
+
143
+ private
144
+
145
+ def default_scope
146
+ in_index(Index.default)
147
+ end
148
+ end
149
+
150
+ attr_reader :id
151
+ attr_accessor :version
152
+
153
+ def initialize(attributes = {})
154
+ self.class.current_scope.initialize_instance(self)
155
+ end
156
+
157
+ def reload
158
+ params = {}
159
+ params['routing'] = @_parent_id if @_parent_id
160
+ self.elasticsearch_hit =
161
+ Elastictastic.client.get(index, self.class.type, id, params)
162
+ end
163
+
164
+ def elasticsearch_hit=(hit) #:nodoc:
165
+ @id = hit['_id']
166
+ @index = Index.new(hit['_index'])
167
+ @version = hit['_version']
168
+ persisted!
169
+
170
+ doc = {}
171
+ doc.merge!(hit['_source']) if hit['_source']
172
+ fields = hit['fields']
173
+ if fields
174
+ unflattened_fields =
175
+ Util.unflatten_hash(fields.reject { |k, v| v.nil? })
176
+ if unflattened_fields.has_key?('_source')
177
+ doc.merge!(unflattened_fields.delete('_source'))
178
+ end
179
+ doc.merge!(unflattened_fields)
180
+ end
181
+ self.elasticsearch_doc=(doc)
182
+ end
183
+
184
+ def id=(id)
185
+ assert_transient!
186
+ @id = id
187
+ end
188
+
189
+ def index
190
+ return @index if defined? @index
191
+ @index = Index.default
192
+ end
193
+
194
+ def ==(other)
195
+ index == other.index && id == other.id
196
+ end
197
+
198
+ def attributes
199
+ { :id => id, :index => index.name }
200
+ end
201
+
202
+ def inspect
203
+ inspected = "#<#{self.class.name} id: #{id}, index: #{index.name}"
204
+ attributes.each_pair do |attr, value|
205
+ inspected << ", #{attr}: #{value.inspect}"
206
+ end
207
+ embeds.each_pair do |attr, value|
208
+ inspected << ", #{attr}: #{value.inspect}"
209
+ end
210
+ inspected << ">"
211
+ end
212
+ end
213
+ end
@@ -2,51 +2,90 @@ require 'stringio'
2
2
 
3
3
  module Elastictastic
4
4
  class BulkPersistenceStrategy
5
- def initialize
6
- @buffer = StringIO.new
7
- @handlers = []
5
+ DEFAULT_HANDLER = proc { |e| raise(e) if e }
6
+ Operation = Struct.new(:id, :commands, :handler, :skip)
7
+
8
+ def initialize(options)
9
+ @operations = []
10
+ @operations_by_id = {}
11
+ @auto_flush = options.delete(:auto_flush)
8
12
  end
9
13
 
10
- def create(instance, params = {})
14
+ def create(instance, params = {}, &block)
15
+ block ||= DEFAULT_HANDLER
11
16
  if instance.pending_save?
12
17
  raise Elastictastic::OperationNotAllowed,
13
18
  "Can't re-save transient document with pending save in bulk operation"
14
19
  end
15
20
  instance.pending_save!
16
21
  add(
22
+ instance.index,
23
+ instance.id,
17
24
  { 'create' => bulk_identifier(instance) },
18
25
  instance.elasticsearch_doc
19
26
  ) do |response|
20
- instance.id = response['create']['_id']
21
- instance.persisted!
27
+ if response['create']['error']
28
+ block.call(ServerError[response['create']['error']])
29
+ else
30
+ instance.id = response['create']['_id']
31
+ instance.version = response['create']['_version']
32
+ instance.persisted!
33
+ block.call
34
+ end
22
35
  end
23
36
  end
24
37
 
25
- def update(instance)
38
+ def update(instance, &block)
39
+ block ||= DEFAULT_HANDLER
26
40
  instance.pending_save!
27
41
  add(
42
+ instance.index,
43
+ instance.id,
28
44
  { 'index' => bulk_identifier(instance) },
29
45
  instance.elasticsearch_doc
30
- )
46
+ ) do |response|
47
+ if response['index']['error']
48
+ block.call(ServerError[response['index']['error']])
49
+ else
50
+ instance.version = response['index']['_version']
51
+ block.call
52
+ end
53
+ end
31
54
  end
32
55
 
33
- def destroy(instance)
56
+ def destroy(instance, &block)
57
+ block ||= DEFAULT_HANDLER
34
58
  instance.pending_destroy!
35
- add(:delete => bulk_identifier(instance)) do |response|
36
- instance.transient!
59
+ add(instance.index, instance.id, :delete => bulk_identifier(instance)) do |response|
60
+ if response['delete']['error']
61
+ block.call(ServerError[response['delete']['error']])
62
+ else
63
+ instance.transient!
64
+ instance.version = response['delete']['_version']
65
+ block.call
66
+ end
37
67
  end
38
68
  end
39
69
 
40
70
  def flush
41
- return if @buffer.length.zero?
71
+ return if @operations.empty?
42
72
 
43
73
  params = {}
44
74
  params[:refresh] = true if Elastictastic.config.auto_refresh
45
- response = Elastictastic.client.bulk(@buffer.string, params)
75
+ io = StringIO.new
76
+ operations = @operations.reject { |operation| operation.skip }
77
+ @operations.clear
78
+
79
+ operations.each do |operation|
80
+ operation.commands.each do |command|
81
+ io.puts Elastictastic.json_encode(command)
82
+ end
83
+ end
84
+ response = Elastictastic.client.bulk(io.string, params)
46
85
 
47
86
  response['items'].each_with_index do |op_response, i|
48
- handler = @handlers[i]
49
- handler.call(op_response) if handler
87
+ operation = operations[i]
88
+ operation.handler.call(op_response) if operation.handler
50
89
  end
51
90
  response
52
91
  end
@@ -56,15 +95,21 @@ module Elastictastic
56
95
  def bulk_identifier(instance)
57
96
  identifier = { :_index => instance.index.name, :_type => instance.class.type }
58
97
  identifier['_id'] = instance.id if instance.id
98
+ identifier['_version'] = instance.version if instance.version
99
+ routing = instance.class.route(instance)
100
+ identifier['_routing'] = routing.to_s if routing
59
101
  identifier['parent'] = instance._parent_id if instance._parent_id
60
102
  identifier
61
103
  end
62
104
 
63
- def add(*requests, &block)
64
- requests.each do |request|
65
- @buffer.puts(request.to_json)
105
+ def add(index, id, *commands, &block)
106
+ document_id = [index.name, id]
107
+ if id && @operations_by_id.key?(document_id)
108
+ @operations_by_id[document_id].skip = true
66
109
  end
67
- @handlers << block
110
+ @operations << operation = Operation.new(id, commands, block)
111
+ @operations_by_id[document_id] = operation
112
+ flush if @auto_flush && @operations.length >= @auto_flush
68
113
  end
69
114
  end
70
115
  end