scalastic 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/lib/scalastic/partition.rb +6 -0
- data/lib/scalastic/scroller.rb +32 -0
- data/lib/scalastic/version.rb +1 -1
- data/regression/regression_tests/scroll.rb +31 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bce6be5082989888ebb029cc6491c89e9ac426c8
|
4
|
+
data.tar.gz: db64fde0ec4e69129a08d16022820f1fd7a2c04f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c8bdbdd6c7819ccb1aac88061039fc433ba3b590d37ed0a3648659beb9f5073f908cf54f0f23bf5f41c076230433660ae713aa63faef84c8eaa1f68b95e9574
|
7
|
+
data.tar.gz: 25947a75533799e222ce26325e99dcfb9c8f4fe441e261f25ecd5cb15a0e4ab4741b32c4f61a8d58e5a98a5f17a1e8495decabb156802383e520a839dd7416c6
|
data/README.md
CHANGED
@@ -215,6 +215,29 @@ count = partition.search(search_type: 'count')['hits']['total']
|
|
215
215
|
raise 'Some documents were not removed' unless count == 0
|
216
216
|
```
|
217
217
|
|
218
|
+
### Scan/scroll
|
219
|
+
Partitions provide `scroll` method for scrolling through results of the query.
|
220
|
+
```ruby
|
221
|
+
# Connect to Elasticsearch
|
222
|
+
client = Elasticsearch::Client.new
|
223
|
+
client.indices.create index: 'scrolling'
|
224
|
+
partitions = client.partitions
|
225
|
+
partitions.prepare_index index: 'scrolling'
|
226
|
+
|
227
|
+
p = partitions.create id: 1, index: 'scrolling'
|
228
|
+
|
229
|
+
# Create some test data
|
230
|
+
10.times.each do |i|
|
231
|
+
p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
|
232
|
+
end
|
233
|
+
|
234
|
+
# Get the hits. Size is set to 7 to test multiple calls to scroll
|
235
|
+
actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
|
236
|
+
expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
|
237
|
+
|
238
|
+
raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
|
239
|
+
```
|
240
|
+
|
218
241
|
### Deleting by query
|
219
242
|
Scalastic partitions support delete_by_query, but because it is no longer available in Elasticsearch core, we use our own implementation which uses scan/scroll searches and bulk operations for deletion.
|
220
243
|
```ruby
|
data/lib/scalastic/partition.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'scalastic/es_actions_generator'
|
2
2
|
require 'scalastic/partition_selector'
|
3
|
+
require 'scalastic/scroller'
|
3
4
|
|
4
5
|
module Scalastic
|
5
6
|
class Partition
|
@@ -123,6 +124,11 @@ module Scalastic
|
|
123
124
|
get_endpoints.index.nil?
|
124
125
|
end
|
125
126
|
|
127
|
+
def scroll(args)
|
128
|
+
args = args.merge(index: config.search_endpoint(id))
|
129
|
+
Scroller.new(es_client, args)
|
130
|
+
end
|
131
|
+
|
126
132
|
private
|
127
133
|
|
128
134
|
def operation_name(entry)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Scalastic
|
2
|
+
class Scroller
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
def initialize(es_client, args)
|
6
|
+
@es_client = es_client
|
7
|
+
@args = args
|
8
|
+
@scroll = '1m'
|
9
|
+
end
|
10
|
+
|
11
|
+
def scroll=(value)
|
12
|
+
raise(ArgumentError, "scroll cannot be empty!") if value.nil? || value.empty?
|
13
|
+
@scroll = value
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader(:scroll)
|
17
|
+
|
18
|
+
def each(&block)
|
19
|
+
Enumerator.new do |enum|
|
20
|
+
args = @args.merge(search_type: 'scan', scroll: scroll)
|
21
|
+
res = @es_client.search(args)
|
22
|
+
loop do
|
23
|
+
scroll_id = res['_scroll_id']
|
24
|
+
res = @es_client.scroll(body: scroll_id, scroll: scroll)
|
25
|
+
hits = res['hits']['hits']
|
26
|
+
break unless hits.any?
|
27
|
+
hits.each{|h| enum << h}
|
28
|
+
end
|
29
|
+
end.each(&block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/scalastic/version.rb
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
module RegressionTests
|
2
|
+
module Scroll
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def cleanup
|
6
|
+
client = Elasticsearch::Client.new
|
7
|
+
client.indices.delete index: 'scrolling' if client.indices.exists? index: 'scrolling'
|
8
|
+
end
|
9
|
+
|
10
|
+
def run
|
11
|
+
# Connect to Elasticsearch
|
12
|
+
client = Elasticsearch::Client.new
|
13
|
+
client.indices.create index: 'scrolling'
|
14
|
+
partitions = client.partitions
|
15
|
+
partitions.prepare_index index: 'scrolling'
|
16
|
+
|
17
|
+
p = partitions.create id: 1, index: 'scrolling'
|
18
|
+
|
19
|
+
# Create some test data
|
20
|
+
10.times.each do |i|
|
21
|
+
p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get the hits. Size is set to 7 to test multiple calls to scroll
|
25
|
+
actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
|
26
|
+
expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
|
27
|
+
|
28
|
+
raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scalastic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aliaksei Baturytski
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- lib/scalastic/partition.rb
|
103
103
|
- lib/scalastic/partition_selector.rb
|
104
104
|
- lib/scalastic/partitions_client.rb
|
105
|
+
- lib/scalastic/scroller.rb
|
105
106
|
- lib/scalastic/version.rb
|
106
107
|
- regression/regression.rake
|
107
108
|
- regression/regression_tests.rb
|
@@ -120,6 +121,7 @@ files:
|
|
120
121
|
- regression/regression_tests/nested_selector.rb
|
121
122
|
- regression/regression_tests/nested_selector_bulk.rb
|
122
123
|
- regression/regression_tests/partition_operations.rb
|
124
|
+
- regression/regression_tests/scroll.rb
|
123
125
|
- regression/regression_tests/string_selector_field.rb
|
124
126
|
- scalastic.gemspec
|
125
127
|
homepage: https://github.com/aliakb/scalastic
|