scalastic 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/lib/scalastic/partition.rb +6 -0
- data/lib/scalastic/scroller.rb +32 -0
- data/lib/scalastic/version.rb +1 -1
- data/regression/regression_tests/scroll.rb +31 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bce6be5082989888ebb029cc6491c89e9ac426c8
|
4
|
+
data.tar.gz: db64fde0ec4e69129a08d16022820f1fd7a2c04f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c8bdbdd6c7819ccb1aac88061039fc433ba3b590d37ed0a3648659beb9f5073f908cf54f0f23bf5f41c076230433660ae713aa63faef84c8eaa1f68b95e9574
|
7
|
+
data.tar.gz: 25947a75533799e222ce26325e99dcfb9c8f4fe441e261f25ecd5cb15a0e4ab4741b32c4f61a8d58e5a98a5f17a1e8495decabb156802383e520a839dd7416c6
|
data/README.md
CHANGED
@@ -215,6 +215,29 @@ count = partition.search(search_type: 'count')['hits']['total']
|
|
215
215
|
raise 'Some documents were not removed' unless count == 0
|
216
216
|
```
|
217
217
|
|
218
|
+
### Scan/scroll
|
219
|
+
Partitions provide `scroll` method for scrolling through results of the query.
|
220
|
+
```ruby
|
221
|
+
# Connect to Elasticsearch
|
222
|
+
client = Elasticsearch::Client.new
|
223
|
+
client.indices.create index: 'scrolling'
|
224
|
+
partitions = client.partitions
|
225
|
+
partitions.prepare_index index: 'scrolling'
|
226
|
+
|
227
|
+
p = partitions.create id: 1, index: 'scrolling'
|
228
|
+
|
229
|
+
# Create some test data
|
230
|
+
10.times.each do |i|
|
231
|
+
p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
|
232
|
+
end
|
233
|
+
|
234
|
+
# Get the hits. Size is set to 7 to test multiple calls to scroll
|
235
|
+
actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
|
236
|
+
expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
|
237
|
+
|
238
|
+
raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
|
239
|
+
```
|
240
|
+
|
218
241
|
### Deleting by query
|
219
242
|
Scalastic partitions support delete_by_query, but because it is no longer available in Elasticsearch core, we use our own implementation which uses scan/scroll searches and bulk operations for deletion.
|
220
243
|
```ruby
|
data/lib/scalastic/partition.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'scalastic/es_actions_generator'
|
2
2
|
require 'scalastic/partition_selector'
|
3
|
+
require 'scalastic/scroller'
|
3
4
|
|
4
5
|
module Scalastic
|
5
6
|
class Partition
|
@@ -123,6 +124,11 @@ module Scalastic
|
|
123
124
|
get_endpoints.index.nil?
|
124
125
|
end
|
125
126
|
|
127
|
+
def scroll(args)
|
128
|
+
args = args.merge(index: config.search_endpoint(id))
|
129
|
+
Scroller.new(es_client, args)
|
130
|
+
end
|
131
|
+
|
126
132
|
private
|
127
133
|
|
128
134
|
def operation_name(entry)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Scalastic
|
2
|
+
class Scroller
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
def initialize(es_client, args)
|
6
|
+
@es_client = es_client
|
7
|
+
@args = args
|
8
|
+
@scroll = '1m'
|
9
|
+
end
|
10
|
+
|
11
|
+
def scroll=(value)
|
12
|
+
raise(ArgumentError, "scroll cannot be empty!") if value.nil? || value.empty?
|
13
|
+
@scroll = value
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader(:scroll)
|
17
|
+
|
18
|
+
def each(&block)
|
19
|
+
Enumerator.new do |enum|
|
20
|
+
args = @args.merge(search_type: 'scan', scroll: scroll)
|
21
|
+
res = @es_client.search(args)
|
22
|
+
loop do
|
23
|
+
scroll_id = res['_scroll_id']
|
24
|
+
res = @es_client.scroll(body: scroll_id, scroll: scroll)
|
25
|
+
hits = res['hits']['hits']
|
26
|
+
break unless hits.any?
|
27
|
+
hits.each{|h| enum << h}
|
28
|
+
end
|
29
|
+
end.each(&block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/scalastic/version.rb
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
module RegressionTests
|
2
|
+
module Scroll
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def cleanup
|
6
|
+
client = Elasticsearch::Client.new
|
7
|
+
client.indices.delete index: 'scrolling' if client.indices.exists? index: 'scrolling'
|
8
|
+
end
|
9
|
+
|
10
|
+
def run
|
11
|
+
# Connect to Elasticsearch
|
12
|
+
client = Elasticsearch::Client.new
|
13
|
+
client.indices.create index: 'scrolling'
|
14
|
+
partitions = client.partitions
|
15
|
+
partitions.prepare_index index: 'scrolling'
|
16
|
+
|
17
|
+
p = partitions.create id: 1, index: 'scrolling'
|
18
|
+
|
19
|
+
# Create some test data
|
20
|
+
10.times.each do |i|
|
21
|
+
p.index id: i + 1, type: 'test', body: {subject: "Test ##{i + 1}"}
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get the hits. Size is set to 7 to test multiple calls to scroll
|
25
|
+
actual_hits = p.scroll(type: 'test', size: 7).to_a.sort{|h1, h2| h1['_id'].to_i <=> h2['_id'].to_i}
|
26
|
+
expected_hits = 10.times.map{|i| {'_id' => "#{i + }", '_type' => 'test', '_source' => {'_subject' => "Test ##{i + 1}"} }}
|
27
|
+
|
28
|
+
raise "Expected: #{expected_hits}, got: #{actual_hits}" unless expected_hits == actual_hits
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scalastic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aliaksei Baturytski
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- lib/scalastic/partition.rb
|
103
103
|
- lib/scalastic/partition_selector.rb
|
104
104
|
- lib/scalastic/partitions_client.rb
|
105
|
+
- lib/scalastic/scroller.rb
|
105
106
|
- lib/scalastic/version.rb
|
106
107
|
- regression/regression.rake
|
107
108
|
- regression/regression_tests.rb
|
@@ -120,6 +121,7 @@ files:
|
|
120
121
|
- regression/regression_tests/nested_selector.rb
|
121
122
|
- regression/regression_tests/nested_selector_bulk.rb
|
122
123
|
- regression/regression_tests/partition_operations.rb
|
124
|
+
- regression/regression_tests/scroll.rb
|
123
125
|
- regression/regression_tests/string_selector_field.rb
|
124
126
|
- scalastic.gemspec
|
125
127
|
homepage: https://github.com/aliakb/scalastic
|