ej 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ej.gemspec +0 -1
- data/lib/ej/commands.rb +7 -5
- data/lib/ej/core.rb +21 -17
- data/lib/ej/util.rb +2 -2
- data/lib/ej/version.rb +1 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee1f79efd243a83e80d9b4ace0f58213ceee0c76
|
4
|
+
data.tar.gz: bc86ffef97ad3a387c332a735437c661f6825970
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a1074107c6d568c830ee1153d5adf0b294bcaf7c408ee45117cd4a05b0a09504aa4d362b9f88d6b4bab4e916eb8d29687bbd327681ce8e258b94002cf144ee9
|
7
|
+
data.tar.gz: 5b627b9a60eb8cc01194795bb63aa99a9e28c03151b3964288a1bf18f763ae8f418f18706dbb8c95815e33598ca2b75bcd9731058d2a81bf2749fb29f814a2a6
|
data/ej.gemspec
CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "pry-doc"
|
25
25
|
spec.add_development_dependency 'rspec', '~> 2.4'
|
26
26
|
spec.add_runtime_dependency "thor"
|
27
|
-
spec.add_runtime_dependency "yajl-ruby"
|
28
27
|
spec.add_runtime_dependency "elasticsearch"
|
29
28
|
spec.add_runtime_dependency "hashie"
|
30
29
|
spec.add_runtime_dependency "parallel"
|
data/lib/ej/commands.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'ej/core'
|
2
2
|
require 'thor'
|
3
|
-
require '
|
3
|
+
require 'json'
|
4
4
|
|
5
5
|
module Ej
|
6
6
|
class Commands < Thor
|
@@ -71,6 +71,7 @@ module Ej
|
|
71
71
|
option :per, type: :numeric, default: nil, desc: 'per'
|
72
72
|
option :scroll, type: :string, default: "1m", desc: 'scroll'
|
73
73
|
option :dest_index, type: :string, desc: 'dest index'
|
74
|
+
option :slice_max, type: :numeric, desc: 'slice max'
|
74
75
|
def copy
|
75
76
|
@core.copy(
|
76
77
|
options[:source],
|
@@ -78,7 +79,8 @@ module Ej
|
|
78
79
|
options[:query],
|
79
80
|
options[:per],
|
80
81
|
options[:scroll],
|
81
|
-
options[:dest_index]
|
82
|
+
options[:dest_index],
|
83
|
+
options[:slice_max]
|
82
84
|
)
|
83
85
|
end
|
84
86
|
|
@@ -171,7 +173,7 @@ module Ej
|
|
171
173
|
desc 'not_analyzed', 'not analyzed'
|
172
174
|
def not_analyzed
|
173
175
|
json = File.read(File.expand_path('../../../template/not_analyze_template.json', __FILE__))
|
174
|
-
hash =
|
176
|
+
hash = JSON.parse(json)
|
175
177
|
puts_with_format(@indices.put_template('ej_init', hash))
|
176
178
|
end
|
177
179
|
|
@@ -186,7 +188,7 @@ module Ej
|
|
186
188
|
|
187
189
|
desc 'put_template', 'put template'
|
188
190
|
def put_template(name)
|
189
|
-
hash =
|
191
|
+
hash = JSON.parse(STDIN.read)
|
190
192
|
puts_with_format(@indices.put_template(name, hash))
|
191
193
|
end
|
192
194
|
|
@@ -255,7 +257,7 @@ module Ej
|
|
255
257
|
private
|
256
258
|
|
257
259
|
def puts_with_format(object)
|
258
|
-
puts
|
260
|
+
puts JSON.pretty_generate(object)
|
259
261
|
end
|
260
262
|
|
261
263
|
end
|
data/lib/ej/core.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'json'
|
2
2
|
require 'elasticsearch'
|
3
3
|
require 'hashie'
|
4
4
|
require 'parallel'
|
@@ -54,8 +54,8 @@ module Ej
|
|
54
54
|
docs.each do |doc|
|
55
55
|
source = doc.delete('_source')
|
56
56
|
doc.delete('_score')
|
57
|
-
bulk_message <<
|
58
|
-
bulk_message <<
|
57
|
+
bulk_message << JSON.dump({ 'index' => doc.to_h })
|
58
|
+
bulk_message << JSON.dump(source)
|
59
59
|
end
|
60
60
|
num += 1
|
61
61
|
puts bulk_message.join("\n")
|
@@ -147,37 +147,41 @@ module Ej
|
|
147
147
|
connect_with_retry { @client.bulk body: bulk_message unless bulk_message.empty? }
|
148
148
|
end
|
149
149
|
|
150
|
-
def copy(source, dest, query, per_size, scroll, dest_index)
|
150
|
+
def copy(source, dest, query, per_size, scroll, dest_index, slice_max)
|
151
151
|
source_client = Elasticsearch::Client.new hosts: source
|
152
152
|
dest_client = Elasticsearch::Client.new hosts: dest
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
154
|
+
parallel_array = slice_max ? slice_max.times.to_a : [0]
|
155
|
+
Parallel.map(parallel_array, :in_processes=>parallel_array.size) do |slice_id|
|
156
|
+
scroll_option = get_scroll_option(@index, query, per_size, scroll, slice_id, slice_max)
|
157
|
+
r = connect_with_retry { source_client.search(scroll_option) }
|
158
|
+
total = r['hits']['total']
|
159
|
+
i = 0
|
160
|
+
i += bulk_results(r, dest_client, i, total, dest_index, slice_id)
|
161
|
+
|
162
|
+
while r = connect_with_retry { source_client.scroll(scroll_id: r['_scroll_id'], scroll: scroll) } and
|
163
|
+
(not r['hits']['hits'].empty?) do
|
164
|
+
i += bulk_results(r, dest_client, i, total, dest_index, slice_id)
|
165
|
+
end
|
163
166
|
end
|
164
167
|
end
|
165
168
|
|
166
169
|
private
|
167
170
|
|
168
|
-
def bulk_results(results, dest_client, before_size, total, dest_index)
|
171
|
+
def bulk_results(results, dest_client, before_size, total, dest_index, slice_id)
|
169
172
|
bulk_message = convert_results(results, dest_index)
|
170
173
|
connect_with_retry do
|
171
174
|
dest_client.bulk body: bulk_message unless bulk_message.empty?
|
172
175
|
to_size = before_size + (bulk_message.size/2)
|
173
|
-
@logger.info "copy complete (#{before_size}-#{to_size})/#{total}"
|
176
|
+
@logger.info "slice_id[#{slice_id}] copy complete (#{before_size}-#{to_size})/#{total}"
|
174
177
|
end
|
175
178
|
return (bulk_message.size/2)
|
176
179
|
end
|
177
180
|
|
178
|
-
def get_scroll_option(index, query, size, scroll)
|
181
|
+
def get_scroll_option(index, query, size, scroll, slice_id, slice_max)
|
179
182
|
body = {}
|
180
183
|
body[:query] = { query_string: { query: query } } unless query.nil?
|
184
|
+
body[:slice] = { id: slice_id, max: slice_max } if slice_max
|
181
185
|
search_option = { index: index, scroll: scroll, body: body, size: (size || DEFAULT_PER) }
|
182
186
|
search_option
|
183
187
|
end
|
@@ -207,7 +211,7 @@ module Ej
|
|
207
211
|
if retries < retry_on_failure
|
208
212
|
retries += 1
|
209
213
|
@logger.warn "Could not connect to Elasticsearch, resetting connection and trying again. #{e.message}"
|
210
|
-
sleep
|
214
|
+
sleep 10**retries
|
211
215
|
retry
|
212
216
|
end
|
213
217
|
raise "Could not connect to Elasticsearch after #{retries} retries. #{e.message}"
|
data/lib/ej/util.rb
CHANGED
@@ -2,11 +2,11 @@ module Ej
|
|
2
2
|
class Util
|
3
3
|
def self.parse_json(buffer)
|
4
4
|
begin
|
5
|
-
data =
|
5
|
+
data = JSON.parse(buffer)
|
6
6
|
rescue => e
|
7
7
|
data = []
|
8
8
|
buffer.lines.each do |line|
|
9
|
-
data <<
|
9
|
+
data << JSON.parse(line)
|
10
10
|
end
|
11
11
|
end
|
12
12
|
data.class == Array ? data : [data]
|
data/lib/ej/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ej
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,20 +94,6 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: yajl-ruby
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
97
|
- !ruby/object:Gem::Dependency
|
112
98
|
name: elasticsearch
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|