ej 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ej.gemspec +1 -0
- data/lib/ej/commands.rb +2 -1
- data/lib/ej/core.rb +30 -9
- data/lib/ej/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71050072d6972560642c232f88d3f04c35618496
|
4
|
+
data.tar.gz: d84942d99527ff336b81029a9d13f5f90971e904
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd9a4ab8ddb693fcfe77f10a2bf0146cf74c120d23aba43c428285c6ea1fbe2979edde28ca9cccfc573eb0d27cf6305b85586856a48ae32a84153182cb6aee18
|
7
|
+
data.tar.gz: bfb96e1a5258d6fa6714259216bfa7ed6f4fb4ad8264a271a2927b28e0a121b786cf343278f7ae5616e1642e846add75faee24b6b229de6518deb0fec15c5714
|
data/ej.gemspec
CHANGED
data/lib/ej/commands.rb
CHANGED
@@ -68,8 +68,9 @@ module Ej
|
|
68
68
|
option :dest, type: :string, aliases: '--dest', required: true, desc: 'dest host'
|
69
69
|
option :query, type: :string, aliases: '-q', default: nil, desc: 'query'
|
70
70
|
option :per, type: :numeric, default: nil, desc: 'per'
|
71
|
+
option :proc_num, type: :numeric, default: 1, desc: 'proc num for multi thread'
|
71
72
|
def copy
|
72
|
-
@core.copy(options['source'], options['dest'], options['query'], options['per'])
|
73
|
+
@core.copy(options['source'], options['dest'], options['query'], options['per'], options['proc_num'])
|
73
74
|
end
|
74
75
|
|
75
76
|
desc 'dump', 'dump index'
|
data/lib/ej/core.rb
CHANGED
@@ -5,12 +5,15 @@ require 'yajl'
|
|
5
5
|
require 'elasticsearch'
|
6
6
|
require 'hashie'
|
7
7
|
require 'pp'
|
8
|
+
require 'parallel'
|
8
9
|
|
9
10
|
module Ej
|
10
11
|
class Core
|
11
12
|
DEFAULT_PER = 1000
|
12
13
|
def initialize(host, index, debug)
|
13
|
-
@logger =
|
14
|
+
@logger = Logger.new($stderr)
|
15
|
+
@logger.level = debug ? Logger::DEBUG : Logger::INFO
|
16
|
+
|
14
17
|
@index = index
|
15
18
|
@client = Elasticsearch::Client.new hosts: host, logger: @logger, index: @index
|
16
19
|
end
|
@@ -39,20 +42,23 @@ module Ej
|
|
39
42
|
@client.search index: @index, type: type, body: body
|
40
43
|
end
|
41
44
|
|
42
|
-
def copy(source, dest, query, per_size)
|
45
|
+
def copy(source, dest, query, per_size, proc_num)
|
43
46
|
per = per_size || DEFAULT_PER
|
44
47
|
num = 0
|
45
48
|
logger = Logger.new($stdout)
|
46
49
|
source_client = Elasticsearch::Client.new hosts: source, index: @index
|
47
50
|
dest_client = Elasticsearch::Client.new hosts: dest
|
48
|
-
|
51
|
+
calculate_body = { size: 0 }
|
52
|
+
calculate_body[:query] = { query_string: { query: query } } unless query.nil?
|
53
|
+
calculate_data = Hashie::Mash.new(source_client.search index: @index, body: calculate_body)
|
54
|
+
total = calculate_data.hits.total
|
55
|
+
payloads = ((total/per) + 1).times.to_a
|
56
|
+
Parallel.map(payloads, in_processes: proc_num) do |num|
|
49
57
|
from = num * per
|
50
58
|
body = { size: per, from: from }
|
51
59
|
body[:query] = { query_string: { query: query } } unless query.nil?
|
52
60
|
data = Hashie::Mash.new(source_client.search index: @index, body: body)
|
53
61
|
docs = data.hits.hits
|
54
|
-
total = data.hits.total
|
55
|
-
break if docs.empty?
|
56
62
|
bulk_message = []
|
57
63
|
docs.each do |doc|
|
58
64
|
source = doc.delete('_source')
|
@@ -63,9 +69,9 @@ module Ej
|
|
63
69
|
bulk_message << { index: doc.to_h }
|
64
70
|
bulk_message << source
|
65
71
|
end
|
66
|
-
dest_client
|
67
|
-
|
68
|
-
|
72
|
+
send_with_retry(dest_client, bulk_message)
|
73
|
+
|
74
|
+
logger.info("copy complete (#{from}-#{from + docs.size})/#{total}")
|
69
75
|
end
|
70
76
|
end
|
71
77
|
|
@@ -243,12 +249,27 @@ module Ej
|
|
243
249
|
bulk_message << record
|
244
250
|
end
|
245
251
|
bulk_message.each_slice(10000).each do |block|
|
246
|
-
@client
|
252
|
+
send_with_retry(@client, block)
|
247
253
|
end
|
248
254
|
end
|
249
255
|
|
250
256
|
private
|
251
257
|
|
258
|
+
def send_with_retry(client, bulk_message, retry_on_failure = 5)
|
259
|
+
retries = 0
|
260
|
+
begin
|
261
|
+
client.bulk body: bulk_message unless bulk_message.empty?
|
262
|
+
rescue => e
|
263
|
+
if retries < retry_on_failure
|
264
|
+
retries += 1
|
265
|
+
@logger.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
|
266
|
+
sleep 2**retries
|
267
|
+
retry
|
268
|
+
end
|
269
|
+
raise "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
252
273
|
def parse_json(buffer)
|
253
274
|
begin
|
254
275
|
data = Yajl::Parser.parse(buffer)
|
data/lib/ej/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ej
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -136,6 +136,20 @@ dependencies:
|
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: parallel
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
139
153
|
description: elasticsearch command line utility. Returns all results in json.
|
140
154
|
email:
|
141
155
|
- toyama0919@gmail.com
|