scalastic 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 658c178987891b8d35d5e53453957d166be292e8
4
- data.tar.gz: c01b6636034a41ea44c2c02216d53d6c7fb801e7
3
+ metadata.gz: bce6be5082989888ebb029cc6491c89e9ac426c8
4
+ data.tar.gz: db64fde0ec4e69129a08d16022820f1fd7a2c04f
5
5
  SHA512:
6
- metadata.gz: 85c8a15a5b37793884bd143493ceea7d75bfebdb9afa16f4aa67c19b8d500d8a53863190c7050bd20907b42b5ff8cd994718c765d40f00c3fe2b4c732f2f6440
7
- data.tar.gz: 3d87aebccf6648854df7f4ac582fe183432cba07723847665313bc8456180ac87bdae5a3ae06aabec9f6b5cd404b201e7e210cb0a6643104110cecd96e5b817d
6
+ metadata.gz: 4c8bdbdd6c7819ccb1aac88061039fc433ba3b590d37ed0a3648659beb9f5073f908cf54f0f23bf5f41c076230433660ae713aa63faef84c8eaa1f68b95e9574
7
+ data.tar.gz: 25947a75533799e222ce26325e99dcfb9c8f4fe441e261f25ecd5cb15a0e4ab4741b32c4f61a8d58e5a98a5f17a1e8495decabb156802383e520a839dd7416c6
data/README.md CHANGED
@@ -215,6 +215,29 @@ count = partition.search(search_type: 'count')['hits']['total']
215
215
  raise 'Some documents were not removed' unless count == 0
216
216
  ```
217
217
 
218
+ ### Scan/scroll
219
+ Partitions provide `scroll` method for scrolling through results of the query.
220
+ ```ruby
221
+ # Connect to Elasticsearch
222
+ client = Elasticsearch::Client.new
223
+ client.indices.create index: 'scrolling'
224
+ partitions = client.partitions
225
+ partitions.prepare_index index: 'scrolling'
226
+
227
+ p = partitions.create id: 1, index: 'scrolling'
228
+
229
+ # Create some test data
230
+ 10.times.each do |i|
231
+ p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
232
+ end
233
+
234
+ # Get the hits. Size is set to 7 to test multiple calls to scroll
235
+ actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
236
+ expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
237
+
238
+ raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
239
+ ```
240
+
218
241
  ### Deleting by query
219
242
  Scalastic partitions support delete_by_query, but because it is no longer available in Elasticsearch core, we use our own implementation which uses scan/scroll searches and bulk operations for deletion.
220
243
  ```ruby
@@ -1,5 +1,6 @@
1
1
  require 'scalastic/es_actions_generator'
2
2
  require 'scalastic/partition_selector'
3
+ require 'scalastic/scroller'
3
4
 
4
5
  module Scalastic
5
6
  class Partition
@@ -123,6 +124,11 @@ module Scalastic
123
124
  get_endpoints.index.nil?
124
125
  end
125
126
 
127
+ def scroll(args)
128
+ args = args.merge(index: config.search_endpoint(id))
129
+ Scroller.new(es_client, args)
130
+ end
131
+
126
132
  private
127
133
 
128
134
  def operation_name(entry)
@@ -0,0 +1,32 @@
1
+ module Scalastic
2
+ class Scroller
3
+ include Enumerable
4
+
5
+ def initialize(es_client, args)
6
+ @es_client = es_client
7
+ @args = args
8
+ @scroll = '1m'
9
+ end
10
+
11
+ def scroll=(value)
12
+ raise(ArgumentError, "scroll cannot be empty!") if value.nil? || value.empty?
13
+ @scroll = value
14
+ end
15
+
16
+ attr_reader(:scroll)
17
+
18
+ def each(&block)
19
+ Enumerator.new do |enum|
20
+ args = @args.merge(search_type: 'scan', scroll: scroll)
21
+ res = @es_client.search(args)
22
+ loop do
23
+ scroll_id = res['_scroll_id']
24
+ res = @es_client.scroll(body: scroll_id, scroll: scroll)
25
+ hits = res['hits']['hits']
26
+ break unless hits.any?
27
+ hits.each{|h| enum << h}
28
+ end
29
+ end.each(&block)
30
+ end
31
+ end
32
+ end
@@ -1,3 +1,3 @@
1
1
  module Scalastic
2
- VERSION = "0.7.0"
2
+ VERSION = "0.8.0"
3
3
  end
@@ -0,0 +1,31 @@
1
+ module RegressionTests
2
+ module Scroll
3
+ extend self
4
+
5
+ def cleanup
6
+ client = Elasticsearch::Client.new
7
+ client.indices.delete index: 'scrolling' if client.indices.exists? index: 'scrolling'
8
+ end
9
+
10
+ def run
11
+ # Connect to Elasticsearch
12
+ client = Elasticsearch::Client.new
13
+ client.indices.create index: 'scrolling'
14
+ partitions = client.partitions
15
+ partitions.prepare_index index: 'scrolling'
16
+
17
+ p = partitions.create id: 1, index: 'scrolling'
18
+
19
+ # Create some test data
20
+ 10.times.each do |i|
21
+ p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
22
+ end
23
+
24
+ # Get the hits. Size is set to 7 to test multiple calls to scroll
25
+ actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
26
+ expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
27
+
28
+ raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
29
+ end
30
+ end
31
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scalastic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aliaksei Baturytski
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-06-21 00:00:00.000000000 Z
11
+ date: 2016-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -102,6 +102,7 @@ files:
102
102
  - lib/scalastic/partition.rb
103
103
  - lib/scalastic/partition_selector.rb
104
104
  - lib/scalastic/partitions_client.rb
105
+ - lib/scalastic/scroller.rb
105
106
  - lib/scalastic/version.rb
106
107
  - regression/regression.rake
107
108
  - regression/regression_tests.rb
@@ -120,6 +121,7 @@ files:
120
121
  - regression/regression_tests/nested_selector.rb
121
122
  - regression/regression_tests/nested_selector_bulk.rb
122
123
  - regression/regression_tests/partition_operations.rb
124
+ - regression/regression_tests/scroll.rb
123
125
  - regression/regression_tests/string_selector_field.rb
124
126
  - scalastic.gemspec
125
127
  homepage: https://github.com/aliakb/scalastic