elastomer-client 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 20de9fda77111f1c5d50ccbbb63dd8b3316f7149
4
- data.tar.gz: 97b970b60153c6069c44d866b00c08ba0542ae24
3
+ metadata.gz: 067c35156016c954c5bffa66b9f1eba92f977410
4
+ data.tar.gz: a49152e69e1b33ac13f6fef6404cb219a35448e6
5
5
  SHA512:
6
- metadata.gz: 25dd4d57d43c60e279eaec42708d0ba8f12ad3d085baa64d9b68c489e31f5f152e54b5663fad14afe1c459245e7ca9237082fe41d834a8126be3c09f3d311899
7
- data.tar.gz: 751eeeb151a1e582903b84f6383de2f7ff47f39db86afeb582fed185775b6881b915ddaf80c3326930f988bf21d558eb3f66dae82f26b018d4b9c641b64d6b50
6
+ metadata.gz: 09e1f8f14d809b1c475216620ae2fffc7dd2aa712eead86811dcfd81288078cd25d9507ee66453729a7fe82eba3296525395587044d9bbb9c605e656eae4869d
7
+ data.tar.gz: 876a476bf1d9cbdb46c4524dad8bc097813eec062d6757e7be046777a162d097f696240a14de83a256ab93d16ab8ec3f53c427ebb68242ee9b9b35ef008ceb96
@@ -1,3 +1,7 @@
1
+ ## 0.7.0 (2015-09-18)
2
+ - Add streaming bulk functionality via `bulk_stream_items`
3
+ - Make Delete by Query compatible with Elasticsearch 2.0
4
+
1
5
  ## 0.6.0 (2015-09-11)
2
6
  - Support all URL parameters when using `Client.#scroll`
3
7
  - BREAKING: Moved some `Scroller` reader methods into `Scroller.opts`
@@ -27,5 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "bundler", "~> 1.5"
28
28
  spec.add_development_dependency "activesupport", ">= 3.0"
29
29
  spec.add_development_dependency "minitest","~> 4.7"
30
+ spec.add_development_dependency "webmock","~> 1.21"
30
31
  spec.add_development_dependency "rake"
31
32
  end
@@ -43,6 +43,103 @@ module Elastomer
43
43
  end
44
44
  end
45
45
 
46
+ # Stream bulk actions from an Enumerator.
47
+ #
48
+ # Examples
49
+ #
50
+ # ops = [
51
+ # [:index, document1, {:_type => "foo", :_id => 1}],
52
+ # [:create, document2],
53
+ # [:delete, {:_type => "bar", :_id => 42}]
54
+ # ]
55
+ # bulk_stream_responses(ops, :index => 'default-index').each do |response|
56
+ # puts response
57
+ # end
58
+ #
59
+ # Returns an Enumerator of responses.
60
+ def bulk_stream_responses(ops, params = {})
61
+ bulk_obj = Bulk.new(self, params)
62
+
63
+ Enumerator.new do |yielder|
64
+ ops.each do |action, *args|
65
+ response = bulk_obj.send(action, *args)
66
+ yielder.yield response unless response.nil?
67
+ end
68
+
69
+ response = bulk_obj.call
70
+ yielder.yield response unless response.nil?
71
+ end
72
+ end
73
+
74
+ # Internal: Determine whether or not a response item has an HTTP status code
75
+ # in the range 200 to 299.
76
+ #
77
+ # item - The bulk response item
78
+ #
79
+ # Returns a boolean
80
+ def is_ok?(item)
81
+ item.values.first["status"].between?(200, 299)
82
+ end
83
+
84
+ # Stream bulk actions from an Enumerator and passes the response items to
85
+ # the given block.
86
+ #
87
+ # Examples
88
+ #
89
+ # ops = [
90
+ # [:index, document1, {:_type => "foo", :_id => 1}],
91
+ # [:create, document2],
92
+ # [:delete, {:_type => "bar", :_id => 42}]
93
+ # ]
94
+ # bulk_stream_items(ops, :index => 'default-index') do |item|
95
+ # puts item
96
+ # end
97
+ #
98
+ # # return value:
99
+ # # {
100
+ # # "took" => 256,
101
+ # # "errors" => false,
102
+ # # "success" => 3,
103
+ # # "failure" => 0
104
+ # # }
105
+ #
106
+ # # sample response item:
107
+ # # {
108
+ # # "delete": {
109
+ # # "_index": "foo",
110
+ # # "_type": "bar",
111
+ # # "_id": "42",
112
+ # # "_version": 3,
113
+ # # "status": 200,
114
+ # # "found": true
115
+ # # }
116
+ # # }
117
+ #
118
+ # Returns a Hash of stats about items from the responses.
119
+ def bulk_stream_items(ops, params = {})
120
+ stats = {
121
+ "took" => 0,
122
+ "errors" => false,
123
+ "success" => 0,
124
+ "failure" => 0
125
+ }
126
+
127
+ bulk_stream_responses(ops, params).each do |response|
128
+ stats["took"] += response["took"]
129
+ stats["errors"] |= response["errors"]
130
+
131
+ response["items"].each do |item|
132
+ if is_ok?(item)
133
+ stats["success"] += 1
134
+ else
135
+ stats["failure"] += 1
136
+ end
137
+ yield item
138
+ end
139
+ end
140
+
141
+ stats
142
+ end
46
143
 
47
144
  # The Bulk class provides some abstractions and helper methods for working
48
145
  # with the ElasticSearch bulk API command. Instances of the Bulk class
@@ -156,7 +253,7 @@ module Elastomer
156
253
  end
157
254
 
158
255
  # Add an update action to the list of bulk actions to be performed when
159
- # the bulk API call is made. Parameters can be provided in the parameters
256
+ # the bulk API call is made. Parameters can be provided in the parameters
160
257
  # hash (underscore prefix optional) or in the document hash (underscore
161
258
  # prefix required).
162
259
  #
@@ -178,7 +275,7 @@ module Elastomer
178
275
  # the bulk API call is made.
179
276
  #
180
277
  # params - Parameters for the delete action (as a Hash)
181
- #
278
+ #
182
279
  # Examples
183
280
  # delete(:_id => 1, :_type => 'foo')
184
281
  #
@@ -0,0 +1,120 @@
1
+ module Elastomer
2
+ class Client
3
+
4
+ # Delete documents from one or more indices and one or more types based
5
+ # on a query.
6
+ #
7
+ # The return value follows the format returned by the Elasticsearch Delete
8
+ # by Query plugin: https://github.com/elastic/elasticsearch/blob/master/docs/plugins/delete-by-query.asciidoc#response-body
9
+ #
10
+ # Internally, this method uses a combination of scroll and bulk delete
11
+ # instead of the Delete by Query API, which was removed in Elasticsearch
12
+ # 2.0.
13
+ #
14
+ # query - The query body as a Hash
15
+ # params - Parameters Hash
16
+ #
17
+ # Examples
18
+ #
19
+ # # request body query
20
+ # delete_by_query({:query => {:match_all => {}}}, :type => 'tweet')
21
+ #
22
+ # # same thing but using the URI request method
23
+ # delete_by_query(nil, { :q => '*:*', :type => 'tweet' })
24
+ #
25
+ # See http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-delete-by-query.html
26
+ #
27
+ # Returns a Hash of statistics about the delete operations, for example:
28
+ #
29
+ # {
30
+ # "took" : 639,
31
+ # "_indices" : {
32
+ # "_all" : {
33
+ # "found" : 5901,
34
+ # "deleted" : 5901,
35
+ # "missing" : 0,
36
+ # "failed" : 0
37
+ # },
38
+ # "twitter" : {
39
+ # "found" : 5901,
40
+ # "deleted" : 5901,
41
+ # "missing" : 0,
42
+ # "failed" : 0
43
+ # }
44
+ # },
45
+ # "failures" : [ ]
46
+ # }
47
+ def delete_by_query(query, params = {})
48
+ DeleteByQuery.new(self, query, params).execute
49
+ end
50
+
51
+ class DeleteByQuery
52
+
53
+ # Create a new DeleteByQuery command for deleting documents matching a
54
+ # query
55
+ #
56
+ # client - Elastomer::Client used for HTTP requests to the server
57
+ # query - The query used to find documents to delete
58
+ # params - Other URL parameters
59
+ def initialize(client, query, params = {})
60
+ @client = client
61
+ @query = query
62
+ @params = params
63
+ @response_stats = { 'took' => 0, '_indices' => { '_all' => {} }, 'failures' => [] }
64
+ end
65
+
66
+ attr_reader :client, :query, :params, :response_stats
67
+
68
+ # Internal: Determine whether or not an HTTP status code is in the range
69
+ # 200 to 299
70
+ #
71
+ # status - HTTP status code
72
+ #
73
+ # Returns a boolean
74
+ def is_ok?(status)
75
+ status.between?(200, 299)
76
+ end
77
+
78
+ # Internal: Tally the contributions of an item to the found, deleted,
79
+ # missing, and failed counts for the summary statistics
80
+ #
81
+ # item - An element of the items array from a bulk response
82
+ #
83
+ # Returns a Hash of counts for each category
84
+ def categorize(item)
85
+ {
86
+ "found" => item["found"] || item["status"] == 409 ? 1 : 0,
87
+ "deleted" => is_ok?(item["status"]) ? 1 : 0,
88
+ "missing" => !item["found"] && !item.key?("error") ? 1 : 0,
89
+ "failed" => item.key?("error") ? 1 : 0,
90
+ }
91
+ end
92
+
93
+ # Internal: Combine a response item with the existing statistics
94
+ #
95
+ # item - A bulk response item
96
+ def accumulate(item)
97
+ item = item["delete"]
98
+ (@response_stats['_indices'][item['_index']] ||= {}).merge!(categorize(item)) { |_, n, m| n + m }
99
+ @response_stats['_indices']['_all'].merge!(categorize(item)) { |_, n, m| n + m }
100
+ @response_stats['failures'] << item unless is_ok? item['status']
101
+ end
102
+
103
+ # Perform the Delete by Query action
104
+ #
105
+ # Returns a Hash of statistics about the bulk operation
106
+ def execute
107
+ ops = Enumerator.new do |yielder|
108
+ @client.scan(@query, @params).each_document do |hit|
109
+ yielder.yield([:delete, { _id: hit["_id"], _type: hit["_type"], _index: hit["_index"] }])
110
+ end
111
+ end
112
+
113
+ stats = @client.bulk_stream_items(ops, @params) { |item| accumulate(item) }
114
+ @response_stats['took'] = stats['took']
115
+ @response_stats
116
+ end
117
+
118
+ end # DeleteByQuery
119
+ end # Client
120
+ end # Elastomer
@@ -252,25 +252,13 @@ module Elastomer
252
252
  # hash must contain the :query key. Otherwise we assume a URI request is
253
253
  # being made.
254
254
  #
255
- # query - The query body as a Hash
256
- # params - Parameters Hash
257
- #
258
- # Examples
259
- #
260
- # # request body query
261
- # delete_by_query({:query => {:match_all => {}}}, :type => 'tweet')
255
+ # See Client#delete_by_query for more information.
262
256
  #
263
- # # same thing but using the URI request method
264
- # delete_by_query(:q => '*:*', :type => 'tweet')
265
- #
266
- # See http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-delete-by-query.html
267
- #
268
- # Returns the response body as a hash
269
- def delete_by_query( query, params = nil )
257
+ # Returns a Hash of statistics about the delete operations
258
+ def delete_by_query(query, params = nil)
270
259
  query, params = extract_params(query) if params.nil?
271
260
 
272
- response = client.delete '/{index}{/type}/_query', update_params(params, :body => query, :action => 'docs.delete_by_query')
273
- response.body
261
+ @client.delete_by_query(query, update_params(params))
274
262
  end
275
263
 
276
264
  # Returns information and statistics on terms in the fields of a
@@ -523,6 +523,16 @@ module Elastomer
523
523
  client.warmer(name, warmer_name)
524
524
  end
525
525
 
526
+ # Delete documents from one or more indices and one or more types based
527
+ # on a query.
528
+ #
529
+ # See Client#delete_by_query for more information.
530
+ #
531
+ # Returns a Hash of statistics about the delete operations
532
+ def delete_by_query(query, params = nil)
533
+ docs.delete_by_query(query, params)
534
+ end
535
+
526
536
  # Internal: Add default parameters to the `params` Hash and then apply
527
537
  # `overrides` to the params if any are given.
528
538
  #
@@ -1,5 +1,5 @@
1
1
  module Elastomer
2
- VERSION = '0.6.0'
2
+ VERSION = '0.7.0'
3
3
 
4
4
  def self.version
5
5
  VERSION
@@ -293,4 +293,31 @@ describe Elastomer::Client::Bulk do
293
293
  assert_equal 'tweet', items[0]['index']['_type']
294
294
  assert_equal 1, items[0]['index']['_version']
295
295
  end
296
+
297
+ it 'streams bulk responses' do
298
+ ops = [
299
+ [:index, { :message => 'tweet 1' }, { :_id => 1, :_type => 'book', :_index => @index.name }],
300
+ [:index, { :message => 'tweet 2' }, { :_id => 2, :_type => 'book', :_index => @index.name }],
301
+ [:index, { :message => 'tweet 3' }, { :_id => 3, :_type => 'book', :_index => @index.name }]
302
+ ]
303
+ responses = $client.bulk_stream_responses(ops, { :action_count => 2 }).to_a
304
+ assert_equal(2, responses.length)
305
+ assert_bulk_index(responses[0]["items"][0])
306
+ assert_bulk_index(responses[0]["items"][1])
307
+ assert_bulk_index(responses[1]["items"][0])
308
+ end
309
+
310
+ it 'streams bulk items' do
311
+ ops = [
312
+ [:index, { :message => 'tweet 1' }, { :_id => 1, :_type => 'book', :_index => @index.name }],
313
+ [:index, { :message => 'tweet 2' }, { :_id => 2, :_type => 'book', :_index => @index.name }],
314
+ [:index, { :message => 'tweet 3' }, { :_id => 3, :_type => 'book', :_index => @index.name }]
315
+ ]
316
+ items = []
317
+ stats = $client.bulk_stream_items(ops, { :action_count => 2 }) { |item| items << item }
318
+ assert_equal(3, items.length)
319
+ assert_bulk_index(items[0])
320
+ assert_bulk_index(items[1])
321
+ assert_bulk_index(items[2])
322
+ end
296
323
  end
@@ -0,0 +1,151 @@
1
+ require File.expand_path('../../test_helper', __FILE__)
2
+
3
+ describe Elastomer::Client::DeleteByQuery do
4
+
5
+ before do
6
+ @index = $client.index "elastomer-delete-by-query-test"
7
+ @index.delete if @index.exists?
8
+ @docs = @index.docs("docs")
9
+ end
10
+
11
+ after do
12
+ @index.delete if @index.exists?
13
+ end
14
+
15
+ describe "when an index with documents exists" do
16
+ before do
17
+ @index.create(nil)
18
+ wait_for_index(@index_name)
19
+ end
20
+
21
+ it 'deletes by query' do
22
+ @docs.index({ :_id => 0, :name => "mittens" })
23
+ @docs.index({ :_id => 1, :name => "luna" })
24
+
25
+ @index.refresh
26
+ response = $client.delete_by_query(nil, :q => "name:mittens")
27
+ assert_equal({
28
+ '_all' => {
29
+ 'found' => 1,
30
+ 'deleted' => 1,
31
+ 'missing' => 0,
32
+ 'failed' => 0,
33
+ },
34
+ @index.name => {
35
+ 'found' => 1,
36
+ 'deleted' => 1,
37
+ 'missing' => 0,
38
+ 'failed' => 0,
39
+ },
40
+ }, response['_indices'])
41
+
42
+ @index.refresh
43
+ response = @docs.multi_get :ids => [0, 1]
44
+ refute_found response['docs'][0]
45
+ assert_found response['docs'][1]
46
+ end
47
+
48
+ it 'respects action_count' do
49
+ @docs.index({ :_id => 0, :name => "mittens" })
50
+ @docs.index({ :_id => 1, :name => "luna" })
51
+ @index.refresh
52
+
53
+ response = $client.delete_by_query(nil, :action_count => 1)
54
+
55
+ assert_requested(:post, /_bulk/, :times => 2)
56
+
57
+ assert_equal({
58
+ '_all' => {
59
+ 'found' => 2,
60
+ 'deleted' => 2,
61
+ 'missing' => 0,
62
+ 'failed' => 0,
63
+ },
64
+ @index.name => {
65
+ 'found' => 2,
66
+ 'deleted' => 2,
67
+ 'missing' => 0,
68
+ 'failed' => 0,
69
+ },
70
+ }, response['_indices'])
71
+
72
+ @index.refresh
73
+ response = @docs.multi_get :ids => [0, 1]
74
+ refute_found response['docs'][0]
75
+ refute_found response['docs'][1]
76
+ end
77
+
78
+ it 'counts missing documents' do
79
+ @docs.index({ :_id => 0 })
80
+
81
+ stub_request(:post, /_bulk/).
82
+ to_return(lambda do |request|
83
+ {
84
+ :body => MultiJson.dump({
85
+ "took" => 0,
86
+ "errors" => false,
87
+ "items" => [{
88
+ "delete" => {
89
+ "_index" => @index.name,
90
+ "_type" => @docs.name,
91
+ "_id" => 0,
92
+ "_version" => 1,
93
+ "status" => 404,
94
+ "found" => false } }] }) }
95
+ end)
96
+
97
+ @index.refresh
98
+ response = $client.delete_by_query(nil, :action_count => 1)
99
+ assert_equal({
100
+ '_all' => {
101
+ 'found' => 0,
102
+ 'deleted' => 0,
103
+ 'missing' => 1,
104
+ 'failed' => 0,
105
+ },
106
+ @index.name => {
107
+ 'found' => 0,
108
+ 'deleted' => 0,
109
+ 'missing' => 1,
110
+ 'failed' => 0,
111
+ },
112
+ }, response['_indices'])
113
+ end
114
+
115
+ it 'counts failed operations' do
116
+ @docs.index({ :_id => 0 })
117
+
118
+ stub_request(:post, /_bulk/).
119
+ to_return(lambda do |request|
120
+ {
121
+ :body => MultiJson.dump({
122
+ "took" => 0,
123
+ "errors" => false,
124
+ "items" => [{
125
+ "delete" => {
126
+ "_index" => @index.name,
127
+ "_type" => @docs.name,
128
+ "_id" => 0,
129
+ "status" => 409,
130
+ "error" => "VersionConflictEngineException" } }] }) }
131
+ end)
132
+
133
+ @index.refresh
134
+ response = $client.delete_by_query(nil, :action_count => 1)
135
+ assert_equal({
136
+ '_all' => {
137
+ 'found' => 1,
138
+ 'deleted' => 0,
139
+ 'missing' => 0,
140
+ 'failed' => 1,
141
+ },
142
+ @index.name => {
143
+ 'found' => 1,
144
+ 'deleted' => 0,
145
+ 'missing' => 0,
146
+ 'failed' => 1,
147
+ },
148
+ }, response['_indices'])
149
+ end
150
+ end
151
+ end
@@ -214,6 +214,20 @@ describe Elastomer::Client::Docs do
214
214
  assert_equal %w[pea53 grantr], authors
215
215
 
216
216
  h = @docs.delete_by_query(:q => "author:grantr")
217
+ assert_equal(h['_indices'], {
218
+ '_all' => {
219
+ 'found' => 1,
220
+ 'deleted' => 1,
221
+ 'missing' => 0,
222
+ 'failed' => 0,
223
+ },
224
+ @name => {
225
+ 'found' => 1,
226
+ 'deleted' => 1,
227
+ 'missing' => 0,
228
+ 'failed' => 0,
229
+ },
230
+ })
217
231
  @index.refresh
218
232
  h = @docs.multi_get :ids => [1, 2]
219
233
  assert_found h['docs'][0]
@@ -362,5 +362,25 @@ describe Elastomer::Client::Index do
362
362
  response = @index.segments
363
363
  assert_includes response["indices"], "elastomer-index-test"
364
364
  end
365
+
366
+ it 'deletes by query' do
367
+ @index.docs('foo').index("foo" => "bar")
368
+ @index.refresh
369
+ r = @index.delete_by_query(:q => '*')
370
+ assert_equal({
371
+ '_all' => {
372
+ 'found' => 1,
373
+ 'deleted' => 1,
374
+ 'missing' => 0,
375
+ 'failed' => 0,
376
+ },
377
+ @name => {
378
+ 'found' => 1,
379
+ 'deleted' => 1,
380
+ 'missing' => 0,
381
+ 'failed' => 0,
382
+ }
383
+ }, r['_indices'])
384
+ end
365
385
  end
366
386
  end
@@ -1,3 +1,6 @@
1
+ require 'webmock/minitest'
2
+ WebMock.allow_net_connect!
3
+
1
4
  require 'securerandom'
2
5
  require 'rubygems' unless defined? Gem
3
6
  require 'bundler'
@@ -24,7 +27,7 @@ $client_params = {
24
27
  :port => ENV['BOXEN_ELASTICSEARCH_PORT'] || 9200,
25
28
  :read_timeout => 2,
26
29
  :open_timeout => 1,
27
- :opaque_id => true
30
+ :opaque_id => false
28
31
  }
29
32
  $client = Elastomer::Client.new $client_params
30
33
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastomer-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Pease
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-09-11 00:00:00.000000000 Z
12
+ date: 2015-10-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: addressable
@@ -109,6 +109,20 @@ dependencies:
109
109
  - - "~>"
110
110
  - !ruby/object:Gem::Version
111
111
  version: '4.7'
112
+ - !ruby/object:Gem::Dependency
113
+ name: webmock
114
+ requirement: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: '1.21'
119
+ type: :development
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - "~>"
124
+ - !ruby/object:Gem::Version
125
+ version: '1.21'
112
126
  - !ruby/object:Gem::Dependency
113
127
  name: rake
114
128
  requirement: !ruby/object:Gem::Requirement
@@ -156,6 +170,7 @@ files:
156
170
  - lib/elastomer/client.rb
157
171
  - lib/elastomer/client/bulk.rb
158
172
  - lib/elastomer/client/cluster.rb
173
+ - lib/elastomer/client/delete_by_query.rb
159
174
  - lib/elastomer/client/docs.rb
160
175
  - lib/elastomer/client/errors.rb
161
176
  - lib/elastomer/client/index.rb
@@ -179,6 +194,7 @@ files:
179
194
  - test/assertions.rb
180
195
  - test/client/bulk_test.rb
181
196
  - test/client/cluster_test.rb
197
+ - test/client/delete_by_query_test.rb
182
198
  - test/client/docs_test.rb
183
199
  - test/client/errors_test.rb
184
200
  - test/client/index_test.rb
@@ -225,6 +241,7 @@ test_files:
225
241
  - test/assertions.rb
226
242
  - test/client/bulk_test.rb
227
243
  - test/client/cluster_test.rb
244
+ - test/client/delete_by_query_test.rb
228
245
  - test/client/docs_test.rb
229
246
  - test/client/errors_test.rb
230
247
  - test/client/index_test.rb