elastictastic 0.5.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/LICENSE +1 -1
  2. data/README.md +161 -10
  3. data/lib/elastictastic/adapter.rb +84 -0
  4. data/lib/elastictastic/association.rb +6 -0
  5. data/lib/elastictastic/basic_document.rb +213 -0
  6. data/lib/elastictastic/bulk_persistence_strategy.rb +64 -19
  7. data/lib/elastictastic/callbacks.rb +18 -12
  8. data/lib/elastictastic/child_collection_proxy.rb +15 -11
  9. data/lib/elastictastic/client.rb +47 -24
  10. data/lib/elastictastic/configuration.rb +59 -4
  11. data/lib/elastictastic/dirty.rb +43 -28
  12. data/lib/elastictastic/discrete_persistence_strategy.rb +48 -23
  13. data/lib/elastictastic/document.rb +1 -85
  14. data/lib/elastictastic/embedded_document.rb +34 -0
  15. data/lib/elastictastic/errors.rb +17 -5
  16. data/lib/elastictastic/field.rb +3 -0
  17. data/lib/elastictastic/mass_assignment_security.rb +2 -4
  18. data/lib/elastictastic/middleware.rb +66 -84
  19. data/lib/elastictastic/multi_get.rb +30 -0
  20. data/lib/elastictastic/multi_search.rb +70 -0
  21. data/lib/elastictastic/nested_document.rb +3 -27
  22. data/lib/elastictastic/new_relic_instrumentation.rb +8 -8
  23. data/lib/elastictastic/observing.rb +8 -6
  24. data/lib/elastictastic/optimistic_locking.rb +57 -0
  25. data/lib/elastictastic/parent_child.rb +56 -54
  26. data/lib/elastictastic/persistence.rb +16 -16
  27. data/lib/elastictastic/properties.rb +136 -96
  28. data/lib/elastictastic/railtie.rb +1 -1
  29. data/lib/elastictastic/rotor.rb +105 -0
  30. data/lib/elastictastic/scope.rb +186 -56
  31. data/lib/elastictastic/server_error.rb +20 -1
  32. data/lib/elastictastic/test_helpers.rb +152 -97
  33. data/lib/elastictastic/thrift/constants.rb +12 -0
  34. data/lib/elastictastic/thrift/rest.rb +83 -0
  35. data/lib/elastictastic/thrift/types.rb +124 -0
  36. data/lib/elastictastic/thrift_adapter.rb +61 -0
  37. data/lib/elastictastic/transport_methods.rb +27 -0
  38. data/lib/elastictastic/validations.rb +11 -13
  39. data/lib/elastictastic/version.rb +1 -1
  40. data/lib/elastictastic.rb +148 -27
  41. data/spec/environment.rb +1 -1
  42. data/spec/examples/bulk_persistence_strategy_spec.rb +151 -23
  43. data/spec/examples/callbacks_spec.rb +65 -34
  44. data/spec/examples/dirty_spec.rb +160 -1
  45. data/spec/examples/document_spec.rb +168 -106
  46. data/spec/examples/middleware_spec.rb +1 -61
  47. data/spec/examples/multi_get_spec.rb +127 -0
  48. data/spec/examples/multi_search_spec.rb +113 -0
  49. data/spec/examples/observing_spec.rb +24 -3
  50. data/spec/examples/optimistic_locking_spec.rb +417 -0
  51. data/spec/examples/parent_child_spec.rb +73 -33
  52. data/spec/examples/properties_spec.rb +53 -0
  53. data/spec/examples/rotor_spec.rb +132 -0
  54. data/spec/examples/scope_spec.rb +78 -18
  55. data/spec/examples/search_spec.rb +26 -0
  56. data/spec/examples/validation_spec.rb +7 -1
  57. data/spec/models/author.rb +1 -1
  58. data/spec/models/blog.rb +2 -0
  59. data/spec/models/comment.rb +1 -1
  60. data/spec/models/photo.rb +9 -0
  61. data/spec/models/post.rb +3 -0
  62. metadata +97 -78
  63. data/lib/elastictastic/resource.rb +0 -4
  64. data/spec/examples/active_model_lint_spec.rb +0 -20
@@ -1,12 +1,12 @@
1
1
  require 'hashie'
2
+ require 'elastictastic/search'
2
3
 
3
4
  module Elastictastic
4
5
  class Scope < BasicObject
5
6
  attr_reader :clazz, :index
6
7
 
7
- def initialize(index, clazz, search = Search.new, parent_collection = nil)
8
- @index, @clazz, @search, @parent_collection =
9
- index, clazz, search, parent_collection
8
+ def initialize(index, clazz, search = Search.new, parent = nil, routing = nil)
9
+ @index, @clazz, @search, @parent, @routing = index, clazz, search, parent, routing
10
10
  end
11
11
 
12
12
  def initialize_instance(instance)
@@ -26,6 +26,29 @@ module Elastictastic
26
26
  end
27
27
  end
28
28
 
29
+ #
30
+ # Iterate over all documents matching this scope. The underlying mechanism
31
+ # used differs depending on the construction of this scope:
32
+ #
33
+ # * If the scope has a size, documents will be retrieved in a single request
34
+ # * If the scope has a sort but no size, documents will be retrieved in
35
+ # batches using a `query_then_fetch` search. *In this case, it is
36
+ # impossible to guarantee a consistent result set if concurrent
37
+ # modification is occurring.*
38
+ # * If the scope has neither a sort nor a size, documents will be retrieved
39
+ # in batches using a cursor (search type `scan`). In this case, the result
40
+ # set is guaranteed to be consistent even if concurrent modification
41
+ # occurs.
42
+ #
43
+ # @param (see #find_in_batches)
44
+ # @option (see #find_in_batches)
45
+ # @yield [document, hit] Each result is yielded to the block
46
+ # @yieldparam [Document] document A materialized Document instance
47
+ # @yieldparam [Hashie::Mash] hit The raw hit from ElasticSearch, wrapped in
48
+ # a Hashie::Mash. Useful for extracting metadata, e.g. highlighting
49
+ # @return [Enumerator] An enumerator, if no block is passed
50
+ # @see http://www.elasticsearch.org/guide/reference/api/search/search-type.html
51
+ #
29
52
  def find_each(batch_options = {}, &block)
30
53
  if block
31
54
  find_in_batches(batch_options) { |batch| batch.each(&block) }
@@ -34,6 +57,20 @@ module Elastictastic
34
57
  end
35
58
  end
36
59
 
60
+ #
61
+ # Yield batches of documents matching this scope. See #find_each for a
62
+ # discussion of different strategies for retrieving documents from
63
+ # ElasticSearch depending on the construction of this scope.
64
+ #
65
+ # @option batch_options [Fixnum] :batch_size (Elastictastic.config.default_batch_size)
66
+ # How many documents to retrieve from the server in each batch.
67
+ # @option batch_options [Fixnum] :ttl (60) How long to keep the cursor
68
+ # alive, in the case where search is performed with a cursor.
69
+ # @yield [batch] Once for each batch of hits
70
+ # @yieldparam [Enumerator] batch An enumerator for this batch of hits.
71
+ # The enumerator will yield a materialized Document and a Hashie::Mash wrapping each raw hit.
72
+ # @return [Enumerator] An enumerator that yields batches, if no block is passed.
73
+ #
37
74
  def find_in_batches(batch_options = {}, &block)
38
75
  return ::Enumerator.new(self, :find_in_batches, batch_options) unless block
39
76
  if params.key?('size') || params.key?('from')
@@ -73,6 +110,20 @@ module Elastictastic
73
110
  scoped({})
74
111
  end
75
112
 
113
+ def [](index_or_range)
114
+ case index_or_range
115
+ when ::Integer
116
+ from(index_or_range).size(1).to_a.first
117
+ when ::Range
118
+ range_size = index_or_range.last - index_or_range.first
119
+ range_size += 1 unless index_or_range.exclude_end?
120
+ from(index_or_range.first).
121
+ size(range_size)
122
+ else
123
+ raise ::ArgumentError, "Expected Integer or Range"
124
+ end
125
+ end
126
+
76
127
  def all_facets
77
128
  return @all_facets if defined? @all_facets
78
129
  populate_counts
@@ -84,10 +135,16 @@ module Elastictastic
84
135
  @index,
85
136
  @clazz,
86
137
  @search.merge(Search.new(params)),
87
- @parent_collection
138
+ @parent,
139
+ @routing
88
140
  )
89
141
  end
90
142
 
143
+ #
144
+ # Destroy all documents in this index.
145
+ #
146
+ # @note This will *not* take into account filters or queries in this scope.
147
+ #
91
148
  def destroy_all
92
149
  #FIXME support delete-by-query
93
150
  ::Elastictastic.client.delete(@index, @clazz.type)
@@ -98,13 +155,40 @@ module Elastictastic
98
155
  ::Elastictastic.client.put_mapping(index, type, @clazz.mapping)
99
156
  end
100
157
 
158
+ def exists?(id)
159
+ ::Elastictastic.client.
160
+ exists?(index, type, id, params_for_find.slice('routing'))
161
+ end
162
+
163
+ #
164
+ # Look up one or more documents by ID.
165
+ #
166
+ # Retrieve one or more Elastictastic documents by ID
167
+ #
168
+ # @overload find(*ids)
169
+ # Retrieve a single document or a collection of documents
170
+ #
171
+ # @param [String] ids Document IDs
172
+ # @return [Elastictastic::BasicDocument,Array] Collection of documents with the given IDs
173
+ #
174
+ # @overload find(id)
175
+ # Retrieve a single Elastictastic document
176
+ #
177
+ # @param [String] id ID of the document
178
+ # @return [Elastictastic::BasicDocument] The document with that ID, or nil if not found
179
+ #
180
+ # @overload find(ids)
181
+ # Retrieve a collection of Elastictastic documents by ID. This will
182
+ # return an Array even if the ids argument is a one-element Array.
183
+ #
184
+ # @param [Array] ids Document IDs
185
+ # @return [Array] Collection of documents with the given IDs
186
+ #
101
187
  def find(*ids)
102
188
  #TODO support combining this with other filters/query
103
189
  force_array = ::Array === ids.first
104
190
  ids = ids.flatten
105
- if ::Hash === ids.first
106
- find_many_in_many_indices(*ids)
107
- elsif ids.length == 1
191
+ if ids.length == 1
108
192
  instance = find_one(ids.first)
109
193
  force_array ? [instance] : instance
110
194
  else
@@ -128,6 +212,12 @@ module Elastictastic
128
212
  RUBY
129
213
  end
130
214
 
215
+ def routing(routing)
216
+ scope = scoped({})
217
+ scope.routing = routing
218
+ scope
219
+ end
220
+
131
221
  def method_missing(method, *args, &block)
132
222
  if ::Enumerable.method_defined?(method)
133
223
  each.__send__(method, *args, &block)
@@ -142,11 +232,76 @@ module Elastictastic
142
232
 
143
233
  def inspect
144
234
  inspected = "#{@clazz.name}:#{@index.name}"
145
- inspected << @search.params.to_json unless @search.params.empty?
235
+ inspected << ::Elastictastic.json_encode(@search.params) unless @search.params.empty?
146
236
  inspected
147
237
  end
148
238
 
239
+ #
240
+ # @private
241
+ #
242
+ def response=(response)
243
+ self.counts = response
244
+ @materialized_hits = materialize_hits(response['hits']['hits'])
245
+ end
246
+
247
+ #
248
+ # @private
249
+ #
250
+ def counts=(response)
251
+ @count ||= response['hits']['total']
252
+ if response['facets']
253
+ @all_facets ||= ::Hashie::Mash.new(response['facets'])
254
+ end
255
+ end
256
+
257
+ #
258
+ # @private
259
+ #
260
+ def find_one(id, params = {})
261
+ data = ::Elastictastic.client.
262
+ get(index, type, id, params_for_find_one.merge(params.stringify_keys))
263
+ return nil if data['exists'] == false
264
+ case data['status']
265
+ when nil
266
+ materialize_hit(data)
267
+ when 404
268
+ nil
269
+ end
270
+ end
271
+
272
+ def multi_get_params
273
+ {
274
+ '_type' => type,
275
+ '_index' => @index.name
276
+ }.tap do |params|
277
+ params['fields'] = ::Kernel.Array(@search['fields']) if @search['fields']
278
+ if @routing
279
+ params['routing'] = @routing
280
+ elsif @clazz.routing_required?
281
+ ::Kernel.raise ::Elastictastic::MissingParameter,
282
+ "Must specify routing parameter to look up #{@clazz.name} by ID"
283
+ end
284
+ end
285
+ end
286
+
287
+ def multi_search_headers
288
+ {'type' => type, 'index' => @index.name}.tap do |params|
289
+ params['routing'] = @routing if @routing
290
+ end
291
+ end
292
+
293
+ #
294
+ # @private
295
+ #
296
+ def materialize_hit(hit)
297
+ @clazz.new.tap do |result|
298
+ result.parent = @parent if @parent
299
+ result.elasticsearch_hit = hit
300
+ end
301
+ end
302
+
149
303
  protected
304
+ attr_writer :routing
150
305
 
151
306
  def search(search_params = {})
152
307
  ::Elastictastic.client.search(
@@ -160,9 +315,11 @@ module Elastictastic
160
315
  private
161
316
 
162
317
  def search_all
163
- response = search(:search_type => 'query_then_fetch')
164
- populate_counts(response)
165
- materialize_hits(response['hits']['hits'])
318
+ return @materialized_hits if defined? @materialized_hits
319
+ search_params = {:search_type => 'query_then_fetch'}
320
+ search_params[:routing] = @routing if @routing
321
+ self.response = search(search_params)
322
+ @materialized_hits
166
323
  end
167
324
 
168
325
  def search_in_batches(&block)
@@ -170,8 +327,10 @@ module Elastictastic
170
327
  scope_with_size = self.size(size)
171
328
  begin
172
329
  scope = scope_with_size.from(from)
173
- response = scope.search(:search_type => 'query_then_fetch')
174
- populate_counts(response)
330
+ params = {:search_type => 'query_then_fetch'}
331
+ params[:routing] = @routing if @routing
332
+ response = scope.search(params)
333
+ self.counts = response
175
334
  yield materialize_hits(response['hits']['hits'])
176
335
  from += size
177
336
  @count ||= scope.count
@@ -184,6 +343,7 @@ module Elastictastic
184
343
  :scroll => "#{batch_options[:ttl] || 60}s",
185
344
  :size => batch_options[:batch_size] || ::Elastictastic.config.default_batch_size
186
345
  }
346
+ scroll_options[:routing] = @routing if @routing
187
347
  scan_response = ::Elastictastic.client.search(
188
348
  @index,
189
349
  @clazz.type,
@@ -201,51 +361,22 @@ module Elastictastic
201
361
  end until response['hits']['hits'].empty?
202
362
  end
203
363
 
204
- def populate_counts(response = nil)
205
- response ||= search(:search_type => 'count')
206
- @count ||= response['hits']['total']
207
- if response['facets']
208
- @all_facets ||= ::Hashie::Mash.new(response['facets'])
209
- end
364
+ def populate_counts
365
+ params = {:search_type => 'count'}
366
+ params[:routing] = @routing if @routing
367
+ self.counts = search(params)
210
368
  end
211
369
 
212
- def find_one(id)
213
- data = ::Elastictastic.client.get(index, type, id, params_for_find_one)
214
- return nil if data['exists'] == false
215
- case data['status']
216
- when nil
217
- materialize_hit(data)
218
- when 404
219
- nil
220
- end
221
- end
222
-
223
- def find_many(ids)
370
+ def find_many(ids, params = {})
224
371
  docspec = ids.map do |id|
225
- { '_id' => id }.merge!(params_for_find_many)
372
+ { '_id' => id }.merge!(params_for_find_many).
373
+ merge!(params.stringify_keys)
226
374
  end
227
375
  materialize_hits(
228
376
  ::Elastictastic.client.mget(docspec, index, type)['docs']
229
377
  ).map { |result, hit| result }
230
378
  end
231
379
 
232
- def find_many_in_many_indices(ids_by_index)
233
- docs = []
234
- ids_by_index.each_pair do |index, ids|
235
- ::Kernel.Array(ids).each do |id|
236
- docs << doc = {
237
- '_id' => id.to_s,
238
- '_type' => type,
239
- '_index' => index
240
- }
241
- doc['fields'] = ::Kernel.Array(@search['fields']) if @search['fields']
242
- end
243
- end
244
- materialize_hits(
245
- ::Elastictastic.client.mget(docs)['docs']
246
- ).map { |result, hit| result }
247
- end
248
-
249
380
  def params_for_find_one
250
381
  params_for_find.tap do |params|
251
382
  params['fields'] &&= params['fields'].join(',')
@@ -259,6 +390,12 @@ module Elastictastic
259
390
  def params_for_find
260
391
  {}.tap do |params|
261
392
  params['fields'] = ::Kernel.Array(@search['fields']) if @search['fields']
393
+ if @routing
394
+ params['routing'] = @routing
395
+ elsif @clazz.routing_required?
396
+ ::Kernel.raise ::Elastictastic::MissingParameter,
397
+ "Must specify routing parameter to look up #{@clazz.name} by ID"
398
+ end
262
399
  end
263
400
  end
264
401
 
@@ -272,12 +409,5 @@ module Elastictastic
272
409
  end
273
410
  end
274
411
  end
275
-
276
- def materialize_hit(hit)
277
- @clazz.new.tap do |result|
278
- result.parent_collection = @parent_collection if @parent_collection
279
- result.elasticsearch_hit = hit
280
- end
281
- end
282
412
  end
283
413
  end
@@ -1,6 +1,9 @@
1
1
  module Elastictastic
2
2
  module ServerError
3
- class ServerError < StandardError
3
+ ERROR_PATTERN = /^([A-Z][A-Za-z]*)(?::\s*)?(.*)$/
4
+ NESTED_PATTERN = /^.*nested:\s+(.*)$/
5
+
6
+ class ServerError < Elastictastic::Error
4
7
  attr_accessor :status
5
8
  end
6
9
 
@@ -10,6 +13,22 @@ module Elastictastic
10
13
  const_set(name, error)
11
14
  end
12
15
  end
16
+
17
+ def [](server_message, status = nil)
18
+ match = ERROR_PATTERN.match(server_message)
19
+ if match
20
+ if (nested_match = NESTED_PATTERN.match(match[2]))
21
+ return self[nested_match[1], status]
22
+ else
23
+ clazz = Elastictastic::ServerError.const_get(match[1])
24
+ error = clazz.new(match[2])
25
+ error.status = status
26
+ error
27
+ end
28
+ else
29
+ Elastictastic::ServerError::ServerError.new(server_message)
30
+ end
31
+ end
13
32
  end
14
33
  end
15
34
  end