ej 0.1.11 → 0.1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ej.gemspec +0 -1
- data/lib/ej/commands.rb +7 -5
- data/lib/ej/core.rb +21 -17
- data/lib/ej/util.rb +2 -2
- data/lib/ej/version.rb +1 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee1f79efd243a83e80d9b4ace0f58213ceee0c76
|
4
|
+
data.tar.gz: bc86ffef97ad3a387c332a735437c661f6825970
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a1074107c6d568c830ee1153d5adf0b294bcaf7c408ee45117cd4a05b0a09504aa4d362b9f88d6b4bab4e916eb8d29687bbd327681ce8e258b94002cf144ee9
|
7
|
+
data.tar.gz: 5b627b9a60eb8cc01194795bb63aa99a9e28c03151b3964288a1bf18f763ae8f418f18706dbb8c95815e33598ca2b75bcd9731058d2a81bf2749fb29f814a2a6
|
data/ej.gemspec
CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "pry-doc"
|
25
25
|
spec.add_development_dependency 'rspec', '~> 2.4'
|
26
26
|
spec.add_runtime_dependency "thor"
|
27
|
-
spec.add_runtime_dependency "yajl-ruby"
|
28
27
|
spec.add_runtime_dependency "elasticsearch"
|
29
28
|
spec.add_runtime_dependency "hashie"
|
30
29
|
spec.add_runtime_dependency "parallel"
|
data/lib/ej/commands.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'ej/core'
|
2
2
|
require 'thor'
|
3
|
-
require '
|
3
|
+
require 'json'
|
4
4
|
|
5
5
|
module Ej
|
6
6
|
class Commands < Thor
|
@@ -71,6 +71,7 @@ module Ej
|
|
71
71
|
option :per, type: :numeric, default: nil, desc: 'per'
|
72
72
|
option :scroll, type: :string, default: "1m", desc: 'scroll'
|
73
73
|
option :dest_index, type: :string, desc: 'dest index'
|
74
|
+
option :slice_max, type: :numeric, desc: 'slice max'
|
74
75
|
def copy
|
75
76
|
@core.copy(
|
76
77
|
options[:source],
|
@@ -78,7 +79,8 @@ module Ej
|
|
78
79
|
options[:query],
|
79
80
|
options[:per],
|
80
81
|
options[:scroll],
|
81
|
-
options[:dest_index]
|
82
|
+
options[:dest_index],
|
83
|
+
options[:slice_max]
|
82
84
|
)
|
83
85
|
end
|
84
86
|
|
@@ -171,7 +173,7 @@ module Ej
|
|
171
173
|
desc 'not_analyzed', 'not analyzed'
|
172
174
|
def not_analyzed
|
173
175
|
json = File.read(File.expand_path('../../../template/not_analyze_template.json', __FILE__))
|
174
|
-
hash =
|
176
|
+
hash = JSON.parse(json)
|
175
177
|
puts_with_format(@indices.put_template('ej_init', hash))
|
176
178
|
end
|
177
179
|
|
@@ -186,7 +188,7 @@ module Ej
|
|
186
188
|
|
187
189
|
desc 'put_template', 'put template'
|
188
190
|
def put_template(name)
|
189
|
-
hash =
|
191
|
+
hash = JSON.parse(STDIN.read)
|
190
192
|
puts_with_format(@indices.put_template(name, hash))
|
191
193
|
end
|
192
194
|
|
@@ -255,7 +257,7 @@ module Ej
|
|
255
257
|
private
|
256
258
|
|
257
259
|
def puts_with_format(object)
|
258
|
-
puts
|
260
|
+
puts JSON.pretty_generate(object)
|
259
261
|
end
|
260
262
|
|
261
263
|
end
|
data/lib/ej/core.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'json'
|
2
2
|
require 'elasticsearch'
|
3
3
|
require 'hashie'
|
4
4
|
require 'parallel'
|
@@ -54,8 +54,8 @@ module Ej
|
|
54
54
|
docs.each do |doc|
|
55
55
|
source = doc.delete('_source')
|
56
56
|
doc.delete('_score')
|
57
|
-
bulk_message <<
|
58
|
-
bulk_message <<
|
57
|
+
bulk_message << JSON.dump({ 'index' => doc.to_h })
|
58
|
+
bulk_message << JSON.dump(source)
|
59
59
|
end
|
60
60
|
num += 1
|
61
61
|
puts bulk_message.join("\n")
|
@@ -147,37 +147,41 @@ module Ej
|
|
147
147
|
connect_with_retry { @client.bulk body: bulk_message unless bulk_message.empty? }
|
148
148
|
end
|
149
149
|
|
150
|
-
def copy(source, dest, query, per_size, scroll, dest_index)
|
150
|
+
def copy(source, dest, query, per_size, scroll, dest_index, slice_max)
|
151
151
|
source_client = Elasticsearch::Client.new hosts: source
|
152
152
|
dest_client = Elasticsearch::Client.new hosts: dest
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
154
|
+
parallel_array = slice_max ? slice_max.times.to_a : [0]
|
155
|
+
Parallel.map(parallel_array, :in_processes=>parallel_array.size) do |slice_id|
|
156
|
+
scroll_option = get_scroll_option(@index, query, per_size, scroll, slice_id, slice_max)
|
157
|
+
r = connect_with_retry { source_client.search(scroll_option) }
|
158
|
+
total = r['hits']['total']
|
159
|
+
i = 0
|
160
|
+
i += bulk_results(r, dest_client, i, total, dest_index, slice_id)
|
161
|
+
|
162
|
+
while r = connect_with_retry { source_client.scroll(scroll_id: r['_scroll_id'], scroll: scroll) } and
|
163
|
+
(not r['hits']['hits'].empty?) do
|
164
|
+
i += bulk_results(r, dest_client, i, total, dest_index, slice_id)
|
165
|
+
end
|
163
166
|
end
|
164
167
|
end
|
165
168
|
|
166
169
|
private
|
167
170
|
|
168
|
-
def bulk_results(results, dest_client, before_size, total, dest_index)
|
171
|
+
def bulk_results(results, dest_client, before_size, total, dest_index, slice_id)
|
169
172
|
bulk_message = convert_results(results, dest_index)
|
170
173
|
connect_with_retry do
|
171
174
|
dest_client.bulk body: bulk_message unless bulk_message.empty?
|
172
175
|
to_size = before_size + (bulk_message.size/2)
|
173
|
-
@logger.info "copy complete (#{before_size}-#{to_size})/#{total}"
|
176
|
+
@logger.info "slice_id[#{slice_id}] copy complete (#{before_size}-#{to_size})/#{total}"
|
174
177
|
end
|
175
178
|
return (bulk_message.size/2)
|
176
179
|
end
|
177
180
|
|
178
|
-
def get_scroll_option(index, query, size, scroll)
|
181
|
+
def get_scroll_option(index, query, size, scroll, slice_id, slice_max)
|
179
182
|
body = {}
|
180
183
|
body[:query] = { query_string: { query: query } } unless query.nil?
|
184
|
+
body[:slice] = { id: slice_id, max: slice_max } if slice_max
|
181
185
|
search_option = { index: index, scroll: scroll, body: body, size: (size || DEFAULT_PER) }
|
182
186
|
search_option
|
183
187
|
end
|
@@ -207,7 +211,7 @@ module Ej
|
|
207
211
|
if retries < retry_on_failure
|
208
212
|
retries += 1
|
209
213
|
@logger.warn "Could not connect to Elasticsearch, resetting connection and trying again. #{e.message}"
|
210
|
-
sleep
|
214
|
+
sleep 10**retries
|
211
215
|
retry
|
212
216
|
end
|
213
217
|
raise "Could not connect to Elasticsearch after #{retries} retries. #{e.message}"
|
data/lib/ej/util.rb
CHANGED
@@ -2,11 +2,11 @@ module Ej
|
|
2
2
|
class Util
|
3
3
|
def self.parse_json(buffer)
|
4
4
|
begin
|
5
|
-
data =
|
5
|
+
data = JSON.parse(buffer)
|
6
6
|
rescue => e
|
7
7
|
data = []
|
8
8
|
buffer.lines.each do |line|
|
9
|
-
data <<
|
9
|
+
data << JSON.parse(line)
|
10
10
|
end
|
11
11
|
end
|
12
12
|
data.class == Array ? data : [data]
|
data/lib/ej/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ej
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,20 +94,6 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: yajl-ruby
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
97
|
- !ruby/object:Gem::Dependency
|
112
98
|
name: elasticsearch
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|