scalastic 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 658c178987891b8d35d5e53453957d166be292e8
4
- data.tar.gz: c01b6636034a41ea44c2c02216d53d6c7fb801e7
3
+ metadata.gz: bce6be5082989888ebb029cc6491c89e9ac426c8
4
+ data.tar.gz: db64fde0ec4e69129a08d16022820f1fd7a2c04f
5
5
  SHA512:
6
- metadata.gz: 85c8a15a5b37793884bd143493ceea7d75bfebdb9afa16f4aa67c19b8d500d8a53863190c7050bd20907b42b5ff8cd994718c765d40f00c3fe2b4c732f2f6440
7
- data.tar.gz: 3d87aebccf6648854df7f4ac582fe183432cba07723847665313bc8456180ac87bdae5a3ae06aabec9f6b5cd404b201e7e210cb0a6643104110cecd96e5b817d
6
+ metadata.gz: 4c8bdbdd6c7819ccb1aac88061039fc433ba3b590d37ed0a3648659beb9f5073f908cf54f0f23bf5f41c076230433660ae713aa63faef84c8eaa1f68b95e9574
7
+ data.tar.gz: 25947a75533799e222ce26325e99dcfb9c8f4fe441e261f25ecd5cb15a0e4ab4741b32c4f61a8d58e5a98a5f17a1e8495decabb156802383e520a839dd7416c6
data/README.md CHANGED
@@ -215,6 +215,29 @@ count = partition.search(search_type: 'count')['hits']['total']
215
215
  raise 'Some documents were not removed' unless count == 0
216
216
  ```
217
217
 
218
+ ### Scan/scroll
219
+ Partitions provide `scroll` method for scrolling through results of the query.
220
+ ```ruby
221
+ # Connect to Elasticsearch
222
+ client = Elasticsearch::Client.new
223
+ client.indices.create index: 'scrolling'
224
+ partitions = client.partitions
225
+ partitions.prepare_index index: 'scrolling'
226
+
227
+ p = partitions.create id: 1, index: 'scrolling'
228
+
229
+ # Create some test data
230
+ 10.times.each do |i|
231
+ p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
232
+ end
233
+
234
+ # Get the hits. Size is set to 7 to test multiple calls to scroll
235
+ actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
236
+ expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
237
+
238
+ raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
239
+ ```
240
+
218
241
  ### Deleting by query
219
242
  Scalastic partitions support delete_by_query, but because it is no longer available in Elasticsearch core, we use our own implementation which uses scan/scroll searches and bulk operations for deletion.
220
243
  ```ruby
@@ -1,5 +1,6 @@
1
1
  require 'scalastic/es_actions_generator'
2
2
  require 'scalastic/partition_selector'
3
+ require 'scalastic/scroller'
3
4
 
4
5
  module Scalastic
5
6
  class Partition
@@ -123,6 +124,11 @@ module Scalastic
123
124
  get_endpoints.index.nil?
124
125
  end
125
126
 
127
+ def scroll(args)
128
+ args = args.merge(index: config.search_endpoint(id))
129
+ Scroller.new(es_client, args)
130
+ end
131
+
126
132
  private
127
133
 
128
134
  def operation_name(entry)
@@ -0,0 +1,32 @@
1
+ module Scalastic
2
+ class Scroller
3
+ include Enumerable
4
+
5
+ def initialize(es_client, args)
6
+ @es_client = es_client
7
+ @args = args
8
+ @scroll = '1m'
9
+ end
10
+
11
+ def scroll=(value)
12
+ raise(ArgumentError, "scroll cannot be empty!") if value.nil? || value.empty?
13
+ @scroll = value
14
+ end
15
+
16
+ attr_reader(:scroll)
17
+
18
+ def each(&block)
19
+ Enumerator.new do |enum|
20
+ args = @args.merge(search_type: 'scan', scroll: scroll)
21
+ res = @es_client.search(args)
22
+ loop do
23
+ scroll_id = res['_scroll_id']
24
+ res = @es_client.scroll(body: scroll_id, scroll: scroll)
25
+ hits = res['hits']['hits']
26
+ break unless hits.any?
27
+ hits.each{|h| enum << h}
28
+ end
29
+ end.each(&block)
30
+ end
31
+ end
32
+ end
@@ -1,3 +1,3 @@
1
1
  module Scalastic
2
- VERSION = "0.7.0"
2
+ VERSION = "0.8.0"
3
3
  end
@@ -0,0 +1,31 @@
1
+ module RegressionTests
2
+ module Scroll
3
+ extend self
4
+
5
+ def cleanup
6
+ client = Elasticsearch::Client.new
7
+ client.indices.delete index: 'scrolling' if client.indices.exists? index: 'scrolling'
8
+ end
9
+
10
+ def run
11
+ # Connect to Elasticsearch
12
+ client = Elasticsearch::Client.new
13
+ client.indices.create index: 'scrolling'
14
+ partitions = client.partitions
15
+ partitions.prepare_index index: 'scrolling'
16
+
17
+ p = partitions.create id: 1, index: 'scrolling'
18
+
19
+ # Create some test data
20
+ 10.times.each do |i|
21
+ p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
22
+ end
23
+
24
+ # Get the hits. Size is set to 7 to test multiple calls to scroll
25
+ actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
26
+ expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
27
+
28
+ raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
29
+ end
30
+ end
31
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scalastic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aliaksei Baturytski
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-06-21 00:00:00.000000000 Z
11
+ date: 2016-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -102,6 +102,7 @@ files:
102
102
  - lib/scalastic/partition.rb
103
103
  - lib/scalastic/partition_selector.rb
104
104
  - lib/scalastic/partitions_client.rb
105
+ - lib/scalastic/scroller.rb
105
106
  - lib/scalastic/version.rb
106
107
  - regression/regression.rake
107
108
  - regression/regression_tests.rb
@@ -120,6 +121,7 @@ files:
120
121
  - regression/regression_tests/nested_selector.rb
121
122
  - regression/regression_tests/nested_selector_bulk.rb
122
123
  - regression/regression_tests/partition_operations.rb
124
+ - regression/regression_tests/scroll.rb
123
125
  - regression/regression_tests/string_selector_field.rb
124
126
  - scalastic.gemspec
125
127
  homepage: https://github.com/aliakb/scalastic