elastomer-client 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 20de9fda77111f1c5d50ccbbb63dd8b3316f7149
4
- data.tar.gz: 97b970b60153c6069c44d866b00c08ba0542ae24
3
+ metadata.gz: 067c35156016c954c5bffa66b9f1eba92f977410
4
+ data.tar.gz: a49152e69e1b33ac13f6fef6404cb219a35448e6
5
5
  SHA512:
6
- metadata.gz: 25dd4d57d43c60e279eaec42708d0ba8f12ad3d085baa64d9b68c489e31f5f152e54b5663fad14afe1c459245e7ca9237082fe41d834a8126be3c09f3d311899
7
- data.tar.gz: 751eeeb151a1e582903b84f6383de2f7ff47f39db86afeb582fed185775b6881b915ddaf80c3326930f988bf21d558eb3f66dae82f26b018d4b9c641b64d6b50
6
+ metadata.gz: 09e1f8f14d809b1c475216620ae2fffc7dd2aa712eead86811dcfd81288078cd25d9507ee66453729a7fe82eba3296525395587044d9bbb9c605e656eae4869d
7
+ data.tar.gz: 876a476bf1d9cbdb46c4524dad8bc097813eec062d6757e7be046777a162d097f696240a14de83a256ab93d16ab8ec3f53c427ebb68242ee9b9b35ef008ceb96
@@ -1,3 +1,7 @@
1
+ ## 0.7.0 (2015-09-18)
2
+ - Add streaming bulk functionality via `bulk_stream_items`
3
+ - Make Delete by Query compatible with Elasticsearch 2.0
4
+
1
5
  ## 0.6.0 (2015-09-11)
2
6
  - Support all URL parameters when using `Client.#scroll`
3
7
  - BREAKING: Moved some `Scroller` reader methods into `Scroller.opts`
@@ -27,5 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "bundler", "~> 1.5"
28
28
  spec.add_development_dependency "activesupport", ">= 3.0"
29
29
  spec.add_development_dependency "minitest","~> 4.7"
30
+ spec.add_development_dependency "webmock","~> 1.21"
30
31
  spec.add_development_dependency "rake"
31
32
  end
@@ -43,6 +43,103 @@ module Elastomer
43
43
  end
44
44
  end
45
45
 
46
+ # Stream bulk actions from an Enumerator.
47
+ #
48
+ # Examples
49
+ #
50
+ # ops = [
51
+ # [:index, document1, {:_type => "foo", :_id => 1}],
52
+ # [:create, document2],
53
+ # [:delete, {:_type => "bar", :_id => 42}]
54
+ # ]
55
+ # bulk_stream_responses(ops, :index => 'default-index').each do |response|
56
+ # puts response
57
+ # end
58
+ #
59
+ # Returns an Enumerator of responses.
60
+ def bulk_stream_responses(ops, params = {})
61
+ bulk_obj = Bulk.new(self, params)
62
+
63
+ Enumerator.new do |yielder|
64
+ ops.each do |action, *args|
65
+ response = bulk_obj.send(action, *args)
66
+ yielder.yield response unless response.nil?
67
+ end
68
+
69
+ response = bulk_obj.call
70
+ yielder.yield response unless response.nil?
71
+ end
72
+ end
73
+
74
+ # Internal: Determine whether or not a response item has an HTTP status code
75
+ # in the range 200 to 299.
76
+ #
77
+ # item - The bulk response item
78
+ #
79
+ # Returns a boolean
80
+ def is_ok?(item)
81
+ item.values.first["status"].between?(200, 299)
82
+ end
83
+
84
+ # Stream bulk actions from an Enumerator and passes the response items to
85
+ # the given block.
86
+ #
87
+ # Examples
88
+ #
89
+ # ops = [
90
+ # [:index, document1, {:_type => "foo", :_id => 1}],
91
+ # [:create, document2],
92
+ # [:delete, {:_type => "bar", :_id => 42}]
93
+ # ]
94
+ # bulk_stream_items(ops, :index => 'default-index') do |item|
95
+ # puts item
96
+ # end
97
+ #
98
+ # # return value:
99
+ # # {
100
+ # # "took" => 256,
101
+ # # "errors" => false,
102
+ # # "success" => 3,
103
+ # # "failure" => 0
104
+ # # }
105
+ #
106
+ # # sample response item:
107
+ # # {
108
+ # # "delete": {
109
+ # # "_index": "foo",
110
+ # # "_type": "bar",
111
+ # # "_id": "42",
112
+ # # "_version": 3,
113
+ # # "status": 200,
114
+ # # "found": true
115
+ # # }
116
+ # # }
117
+ #
118
+ # Returns a Hash of stats about items from the responses.
119
+ def bulk_stream_items(ops, params = {})
120
+ stats = {
121
+ "took" => 0,
122
+ "errors" => false,
123
+ "success" => 0,
124
+ "failure" => 0
125
+ }
126
+
127
+ bulk_stream_responses(ops, params).each do |response|
128
+ stats["took"] += response["took"]
129
+ stats["errors"] |= response["errors"]
130
+
131
+ response["items"].each do |item|
132
+ if is_ok?(item)
133
+ stats["success"] += 1
134
+ else
135
+ stats["failure"] += 1
136
+ end
137
+ yield item
138
+ end
139
+ end
140
+
141
+ stats
142
+ end
46
143
 
47
144
  # The Bulk class provides some abstractions and helper methods for working
48
145
  # with the ElasticSearch bulk API command. Instances of the Bulk class
@@ -156,7 +253,7 @@ module Elastomer
156
253
  end
157
254
 
158
255
  # Add an update action to the list of bulk actions to be performed when
159
- # the bulk API call is made. Parameters can be provided in the parameters
256
+ # the bulk API call is made. Parameters can be provided in the parameters
160
257
  # hash (underscore prefix optional) or in the document hash (underscore
161
258
  # prefix required).
162
259
  #
@@ -178,7 +275,7 @@ module Elastomer
178
275
  # the bulk API call is made.
179
276
  #
180
277
  # params - Parameters for the delete action (as a Hash)
181
- #
278
+ #
182
279
  # Examples
183
280
  # delete(:_id => 1, :_type => 'foo')
184
281
  #
@@ -0,0 +1,120 @@
1
+ module Elastomer
2
+ class Client
3
+
4
+ # Delete documents from one or more indices and one or more types based
5
+ # on a query.
6
+ #
7
+ # The return value follows the format returned by the Elasticsearch Delete
8
+ # by Query plugin: https://github.com/elastic/elasticsearch/blob/master/docs/plugins/delete-by-query.asciidoc#response-body
9
+ #
10
+ # Internally, this method uses a combination of scroll and bulk delete
11
+ # instead of the Delete by Query API, which was removed in Elasticsearch
12
+ # 2.0.
13
+ #
14
+ # query - The query body as a Hash
15
+ # params - Parameters Hash
16
+ #
17
+ # Examples
18
+ #
19
+ # # request body query
20
+ # delete_by_query({:query => {:match_all => {}}}, :type => 'tweet')
21
+ #
22
+ # # same thing but using the URI request method
23
+ # delete_by_query(nil, { :q => '*:*', :type => 'tweet' })
24
+ #
25
+ # See http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-delete-by-query.html
26
+ #
27
+ # Returns a Hash of statistics about the delete operations, for example:
28
+ #
29
+ # {
30
+ # "took" : 639,
31
+ # "_indices" : {
32
+ # "_all" : {
33
+ # "found" : 5901,
34
+ # "deleted" : 5901,
35
+ # "missing" : 0,
36
+ # "failed" : 0
37
+ # },
38
+ # "twitter" : {
39
+ # "found" : 5901,
40
+ # "deleted" : 5901,
41
+ # "missing" : 0,
42
+ # "failed" : 0
43
+ # }
44
+ # },
45
+ # "failures" : [ ]
46
+ # }
47
+ def delete_by_query(query, params = {})
48
+ DeleteByQuery.new(self, query, params).execute
49
+ end
50
+
51
+ class DeleteByQuery
52
+
53
+ # Create a new DeleteByQuery command for deleting documents matching a
54
+ # query
55
+ #
56
+ # client - Elastomer::Client used for HTTP requests to the server
57
+ # query - The query used to find documents to delete
58
+ # params - Other URL parameters
59
+ def initialize(client, query, params = {})
60
+ @client = client
61
+ @query = query
62
+ @params = params
63
+ @response_stats = { 'took' => 0, '_indices' => { '_all' => {} }, 'failures' => [] }
64
+ end
65
+
66
+ attr_reader :client, :query, :params, :response_stats
67
+
68
+ # Internal: Determine whether or not an HTTP status code is in the range
69
+ # 200 to 299
70
+ #
71
+ # status - HTTP status code
72
+ #
73
+ # Returns a boolean
74
+ def is_ok?(status)
75
+ status.between?(200, 299)
76
+ end
77
+
78
+ # Internal: Tally the contributions of an item to the found, deleted,
79
+ # missing, and failed counts for the summary statistics
80
+ #
81
+ # item - An element of the items array from a bulk response
82
+ #
83
+ # Returns a Hash of counts for each category
84
+ def categorize(item)
85
+ {
86
+ "found" => item["found"] || item["status"] == 409 ? 1 : 0,
87
+ "deleted" => is_ok?(item["status"]) ? 1 : 0,
88
+ "missing" => !item["found"] && !item.key?("error") ? 1 : 0,
89
+ "failed" => item.key?("error") ? 1 : 0,
90
+ }
91
+ end
92
+
93
+ # Internal: Combine a response item with the existing statistics
94
+ #
95
+ # item - A bulk response item
96
+ def accumulate(item)
97
+ item = item["delete"]
98
+ (@response_stats['_indices'][item['_index']] ||= {}).merge!(categorize(item)) { |_, n, m| n + m }
99
+ @response_stats['_indices']['_all'].merge!(categorize(item)) { |_, n, m| n + m }
100
+ @response_stats['failures'] << item unless is_ok? item['status']
101
+ end
102
+
103
+ # Perform the Delete by Query action
104
+ #
105
+ # Returns a Hash of statistics about the bulk operation
106
+ def execute
107
+ ops = Enumerator.new do |yielder|
108
+ @client.scan(@query, @params).each_document do |hit|
109
+ yielder.yield([:delete, { _id: hit["_id"], _type: hit["_type"], _index: hit["_index"] }])
110
+ end
111
+ end
112
+
113
+ stats = @client.bulk_stream_items(ops, @params) { |item| accumulate(item) }
114
+ @response_stats['took'] = stats['took']
115
+ @response_stats
116
+ end
117
+
118
+ end # DeleteByQuery
119
+ end # Client
120
+ end # Elastomer
@@ -252,25 +252,13 @@ module Elastomer
252
252
  # hash must contain the :query key. Otherwise we assume a URI request is
253
253
  # being made.
254
254
  #
255
- # query - The query body as a Hash
256
- # params - Parameters Hash
257
- #
258
- # Examples
259
- #
260
- # # request body query
261
- # delete_by_query({:query => {:match_all => {}}}, :type => 'tweet')
255
+ # See Client#delete_by_query for more information.
262
256
  #
263
- # # same thing but using the URI request method
264
- # delete_by_query(:q => '*:*', :type => 'tweet')
265
- #
266
- # See http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-delete-by-query.html
267
- #
268
- # Returns the response body as a hash
269
- def delete_by_query( query, params = nil )
257
+ # Returns a Hash of statistics about the delete operations
258
+ def delete_by_query(query, params = nil)
270
259
  query, params = extract_params(query) if params.nil?
271
260
 
272
- response = client.delete '/{index}{/type}/_query', update_params(params, :body => query, :action => 'docs.delete_by_query')
273
- response.body
261
+ @client.delete_by_query(query, update_params(params))
274
262
  end
275
263
 
276
264
  # Returns information and statistics on terms in the fields of a
@@ -523,6 +523,16 @@ module Elastomer
523
523
  client.warmer(name, warmer_name)
524
524
  end
525
525
 
526
+ # Delete documents from one or more indices and one or more types based
527
+ # on a query.
528
+ #
529
+ # See Client#delete_by_query for more information.
530
+ #
531
+ # Returns a Hash of statistics about the delete operations
532
+ def delete_by_query(query, params = nil)
533
+ docs.delete_by_query(query, params)
534
+ end
535
+
526
536
  # Internal: Add default parameters to the `params` Hash and then apply
527
537
  # `overrides` to the params if any are given.
528
538
  #
@@ -1,5 +1,5 @@
1
1
  module Elastomer
2
- VERSION = '0.6.0'
2
+ VERSION = '0.7.0'
3
3
 
4
4
  def self.version
5
5
  VERSION
@@ -293,4 +293,31 @@ describe Elastomer::Client::Bulk do
293
293
  assert_equal 'tweet', items[0]['index']['_type']
294
294
  assert_equal 1, items[0]['index']['_version']
295
295
  end
296
+
297
+ it 'streams bulk responses' do
298
+ ops = [
299
+ [:index, { :message => 'tweet 1' }, { :_id => 1, :_type => 'book', :_index => @index.name }],
300
+ [:index, { :message => 'tweet 2' }, { :_id => 2, :_type => 'book', :_index => @index.name }],
301
+ [:index, { :message => 'tweet 3' }, { :_id => 3, :_type => 'book', :_index => @index.name }]
302
+ ]
303
+ responses = $client.bulk_stream_responses(ops, { :action_count => 2 }).to_a
304
+ assert_equal(2, responses.length)
305
+ assert_bulk_index(responses[0]["items"][0])
306
+ assert_bulk_index(responses[0]["items"][1])
307
+ assert_bulk_index(responses[1]["items"][0])
308
+ end
309
+
310
+ it 'streams bulk items' do
311
+ ops = [
312
+ [:index, { :message => 'tweet 1' }, { :_id => 1, :_type => 'book', :_index => @index.name }],
313
+ [:index, { :message => 'tweet 2' }, { :_id => 2, :_type => 'book', :_index => @index.name }],
314
+ [:index, { :message => 'tweet 3' }, { :_id => 3, :_type => 'book', :_index => @index.name }]
315
+ ]
316
+ items = []
317
+ stats = $client.bulk_stream_items(ops, { :action_count => 2 }) { |item| items << item }
318
+ assert_equal(3, items.length)
319
+ assert_bulk_index(items[0])
320
+ assert_bulk_index(items[1])
321
+ assert_bulk_index(items[2])
322
+ end
296
323
  end
@@ -0,0 +1,151 @@
1
+ require File.expand_path('../../test_helper', __FILE__)
2
+
3
+ describe Elastomer::Client::DeleteByQuery do
4
+
5
+ before do
6
+ @index = $client.index "elastomer-delete-by-query-test"
7
+ @index.delete if @index.exists?
8
+ @docs = @index.docs("docs")
9
+ end
10
+
11
+ after do
12
+ @index.delete if @index.exists?
13
+ end
14
+
15
+ describe "when an index with documents exists" do
16
+ before do
17
+ @index.create(nil)
18
+ wait_for_index(@index_name)
19
+ end
20
+
21
+ it 'deletes by query' do
22
+ @docs.index({ :_id => 0, :name => "mittens" })
23
+ @docs.index({ :_id => 1, :name => "luna" })
24
+
25
+ @index.refresh
26
+ response = $client.delete_by_query(nil, :q => "name:mittens")
27
+ assert_equal({
28
+ '_all' => {
29
+ 'found' => 1,
30
+ 'deleted' => 1,
31
+ 'missing' => 0,
32
+ 'failed' => 0,
33
+ },
34
+ @index.name => {
35
+ 'found' => 1,
36
+ 'deleted' => 1,
37
+ 'missing' => 0,
38
+ 'failed' => 0,
39
+ },
40
+ }, response['_indices'])
41
+
42
+ @index.refresh
43
+ response = @docs.multi_get :ids => [0, 1]
44
+ refute_found response['docs'][0]
45
+ assert_found response['docs'][1]
46
+ end
47
+
48
+ it 'respects action_count' do
49
+ @docs.index({ :_id => 0, :name => "mittens" })
50
+ @docs.index({ :_id => 1, :name => "luna" })
51
+ @index.refresh
52
+
53
+ response = $client.delete_by_query(nil, :action_count => 1)
54
+
55
+ assert_requested(:post, /_bulk/, :times => 2)
56
+
57
+ assert_equal({
58
+ '_all' => {
59
+ 'found' => 2,
60
+ 'deleted' => 2,
61
+ 'missing' => 0,
62
+ 'failed' => 0,
63
+ },
64
+ @index.name => {
65
+ 'found' => 2,
66
+ 'deleted' => 2,
67
+ 'missing' => 0,
68
+ 'failed' => 0,
69
+ },
70
+ }, response['_indices'])
71
+
72
+ @index.refresh
73
+ response = @docs.multi_get :ids => [0, 1]
74
+ refute_found response['docs'][0]
75
+ refute_found response['docs'][1]
76
+ end
77
+
78
+ it 'counts missing documents' do
79
+ @docs.index({ :_id => 0 })
80
+
81
+ stub_request(:post, /_bulk/).
82
+ to_return(lambda do |request|
83
+ {
84
+ :body => MultiJson.dump({
85
+ "took" => 0,
86
+ "errors" => false,
87
+ "items" => [{
88
+ "delete" => {
89
+ "_index" => @index.name,
90
+ "_type" => @docs.name,
91
+ "_id" => 0,
92
+ "_version" => 1,
93
+ "status" => 404,
94
+ "found" => false } }] }) }
95
+ end)
96
+
97
+ @index.refresh
98
+ response = $client.delete_by_query(nil, :action_count => 1)
99
+ assert_equal({
100
+ '_all' => {
101
+ 'found' => 0,
102
+ 'deleted' => 0,
103
+ 'missing' => 1,
104
+ 'failed' => 0,
105
+ },
106
+ @index.name => {
107
+ 'found' => 0,
108
+ 'deleted' => 0,
109
+ 'missing' => 1,
110
+ 'failed' => 0,
111
+ },
112
+ }, response['_indices'])
113
+ end
114
+
115
+ it 'counts failed operations' do
116
+ @docs.index({ :_id => 0 })
117
+
118
+ stub_request(:post, /_bulk/).
119
+ to_return(lambda do |request|
120
+ {
121
+ :body => MultiJson.dump({
122
+ "took" => 0,
123
+ "errors" => false,
124
+ "items" => [{
125
+ "delete" => {
126
+ "_index" => @index.name,
127
+ "_type" => @docs.name,
128
+ "_id" => 0,
129
+ "status" => 409,
130
+ "error" => "VersionConflictEngineException" } }] }) }
131
+ end)
132
+
133
+ @index.refresh
134
+ response = $client.delete_by_query(nil, :action_count => 1)
135
+ assert_equal({
136
+ '_all' => {
137
+ 'found' => 1,
138
+ 'deleted' => 0,
139
+ 'missing' => 0,
140
+ 'failed' => 1,
141
+ },
142
+ @index.name => {
143
+ 'found' => 1,
144
+ 'deleted' => 0,
145
+ 'missing' => 0,
146
+ 'failed' => 1,
147
+ },
148
+ }, response['_indices'])
149
+ end
150
+ end
151
+ end
@@ -214,6 +214,20 @@ describe Elastomer::Client::Docs do
214
214
  assert_equal %w[pea53 grantr], authors
215
215
 
216
216
  h = @docs.delete_by_query(:q => "author:grantr")
217
+ assert_equal(h['_indices'], {
218
+ '_all' => {
219
+ 'found' => 1,
220
+ 'deleted' => 1,
221
+ 'missing' => 0,
222
+ 'failed' => 0,
223
+ },
224
+ @name => {
225
+ 'found' => 1,
226
+ 'deleted' => 1,
227
+ 'missing' => 0,
228
+ 'failed' => 0,
229
+ },
230
+ })
217
231
  @index.refresh
218
232
  h = @docs.multi_get :ids => [1, 2]
219
233
  assert_found h['docs'][0]
@@ -362,5 +362,25 @@ describe Elastomer::Client::Index do
362
362
  response = @index.segments
363
363
  assert_includes response["indices"], "elastomer-index-test"
364
364
  end
365
+
366
+ it 'deletes by query' do
367
+ @index.docs('foo').index("foo" => "bar")
368
+ @index.refresh
369
+ r = @index.delete_by_query(:q => '*')
370
+ assert_equal({
371
+ '_all' => {
372
+ 'found' => 1,
373
+ 'deleted' => 1,
374
+ 'missing' => 0,
375
+ 'failed' => 0,
376
+ },
377
+ @name => {
378
+ 'found' => 1,
379
+ 'deleted' => 1,
380
+ 'missing' => 0,
381
+ 'failed' => 0,
382
+ }
383
+ }, r['_indices'])
384
+ end
365
385
  end
366
386
  end
@@ -1,3 +1,6 @@
1
+ require 'webmock/minitest'
2
+ WebMock.allow_net_connect!
3
+
1
4
  require 'securerandom'
2
5
  require 'rubygems' unless defined? Gem
3
6
  require 'bundler'
@@ -24,7 +27,7 @@ $client_params = {
24
27
  :port => ENV['BOXEN_ELASTICSEARCH_PORT'] || 9200,
25
28
  :read_timeout => 2,
26
29
  :open_timeout => 1,
27
- :opaque_id => true
30
+ :opaque_id => false
28
31
  }
29
32
  $client = Elastomer::Client.new $client_params
30
33
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastomer-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Pease
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-09-11 00:00:00.000000000 Z
12
+ date: 2015-10-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: addressable
@@ -109,6 +109,20 @@ dependencies:
109
109
  - - "~>"
110
110
  - !ruby/object:Gem::Version
111
111
  version: '4.7'
112
+ - !ruby/object:Gem::Dependency
113
+ name: webmock
114
+ requirement: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: '1.21'
119
+ type: :development
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - "~>"
124
+ - !ruby/object:Gem::Version
125
+ version: '1.21'
112
126
  - !ruby/object:Gem::Dependency
113
127
  name: rake
114
128
  requirement: !ruby/object:Gem::Requirement
@@ -156,6 +170,7 @@ files:
156
170
  - lib/elastomer/client.rb
157
171
  - lib/elastomer/client/bulk.rb
158
172
  - lib/elastomer/client/cluster.rb
173
+ - lib/elastomer/client/delete_by_query.rb
159
174
  - lib/elastomer/client/docs.rb
160
175
  - lib/elastomer/client/errors.rb
161
176
  - lib/elastomer/client/index.rb
@@ -179,6 +194,7 @@ files:
179
194
  - test/assertions.rb
180
195
  - test/client/bulk_test.rb
181
196
  - test/client/cluster_test.rb
197
+ - test/client/delete_by_query_test.rb
182
198
  - test/client/docs_test.rb
183
199
  - test/client/errors_test.rb
184
200
  - test/client/index_test.rb
@@ -225,6 +241,7 @@ test_files:
225
241
  - test/assertions.rb
226
242
  - test/client/bulk_test.rb
227
243
  - test/client/cluster_test.rb
244
+ - test/client/delete_by_query_test.rb
228
245
  - test/client/docs_test.rb
229
246
  - test/client/errors_test.rb
230
247
  - test/client/index_test.rb