ej 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ba7f44c1f49100a84246846a99b87fa467ead32
4
- data.tar.gz: 6223584d39aea4b09bfc2b87ecf8dd5f9e0a1127
3
+ metadata.gz: 71050072d6972560642c232f88d3f04c35618496
4
+ data.tar.gz: d84942d99527ff336b81029a9d13f5f90971e904
5
5
  SHA512:
6
- metadata.gz: c167c8f5deea539ab58a5749f2bb6053603217fc5c7b227b7f34b5b77d52dba7211f0ef277bdbebb1343604435bbfca42a1ce76f5843e90951bfc4ef0814b536
7
- data.tar.gz: 638512a42e5d2ddcdaedaa19085b2d17c054c338aacaecb8b92b1f7edddc610447e0ff3b80e42ecb0f80fe1bf09b88e8998eb63e8f3f4089bd7a3b47fdd7ec26
6
+ metadata.gz: dd9a4ab8ddb693fcfe77f10a2bf0146cf74c120d23aba43c428285c6ea1fbe2979edde28ca9cccfc573eb0d27cf6305b85586856a48ae32a84153182cb6aee18
7
+ data.tar.gz: bfb96e1a5258d6fa6714259216bfa7ed6f4fb4ad8264a271a2927b28e0a121b786cf343278f7ae5616e1642e846add75faee24b6b229de6518deb0fec15c5714
data/ej.gemspec CHANGED
@@ -27,4 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_runtime_dependency "yajl-ruby"
28
28
  spec.add_runtime_dependency "elasticsearch"
29
29
  spec.add_runtime_dependency "hashie"
30
+ spec.add_runtime_dependency "parallel"
30
31
  end
data/lib/ej/commands.rb CHANGED
@@ -68,8 +68,9 @@ module Ej
68
68
  option :dest, type: :string, aliases: '--dest', required: true, desc: 'dest host'
69
69
  option :query, type: :string, aliases: '-q', default: nil, desc: 'query'
70
70
  option :per, type: :numeric, default: nil, desc: 'per'
71
+ option :proc_num, type: :numeric, default: 1, desc: 'proc num for multi thread'
71
72
  def copy
72
- @core.copy(options['source'], options['dest'], options['query'], options['per'])
73
+ @core.copy(options['source'], options['dest'], options['query'], options['per'], options['proc_num'])
73
74
  end
74
75
 
75
76
  desc 'dump', 'dump index'
data/lib/ej/core.rb CHANGED
@@ -5,12 +5,15 @@ require 'yajl'
5
5
  require 'elasticsearch'
6
6
  require 'hashie'
7
7
  require 'pp'
8
+ require 'parallel'
8
9
 
9
10
  module Ej
10
11
  class Core
11
12
  DEFAULT_PER = 1000
12
13
  def initialize(host, index, debug)
13
- @logger = debug ? Logger.new($stderr) : nil
14
+ @logger = Logger.new($stderr)
15
+ @logger.level = debug ? Logger::DEBUG : Logger::INFO
16
+
14
17
  @index = index
15
18
  @client = Elasticsearch::Client.new hosts: host, logger: @logger, index: @index
16
19
  end
@@ -39,20 +42,23 @@ module Ej
39
42
  @client.search index: @index, type: type, body: body
40
43
  end
41
44
 
42
- def copy(source, dest, query, per_size)
45
+ def copy(source, dest, query, per_size, proc_num)
43
46
  per = per_size || DEFAULT_PER
44
47
  num = 0
45
48
  logger = Logger.new($stdout)
46
49
  source_client = Elasticsearch::Client.new hosts: source, index: @index
47
50
  dest_client = Elasticsearch::Client.new hosts: dest
48
- while true
51
+ calculate_body = { size: 0 }
52
+ calculate_body[:query] = { query_string: { query: query } } unless query.nil?
53
+ calculate_data = Hashie::Mash.new(source_client.search index: @index, body: calculate_body)
54
+ total = calculate_data.hits.total
55
+ payloads = ((total/per) + 1).times.to_a
56
+ Parallel.map(payloads, in_processes: proc_num) do |num|
49
57
  from = num * per
50
58
  body = { size: per, from: from }
51
59
  body[:query] = { query_string: { query: query } } unless query.nil?
52
60
  data = Hashie::Mash.new(source_client.search index: @index, body: body)
53
61
  docs = data.hits.hits
54
- total = data.hits.total
55
- break if docs.empty?
56
62
  bulk_message = []
57
63
  docs.each do |doc|
58
64
  source = doc.delete('_source')
@@ -63,9 +69,9 @@ module Ej
63
69
  bulk_message << { index: doc.to_h }
64
70
  bulk_message << source
65
71
  end
66
- dest_client.bulk body: bulk_message unless bulk_message.empty?
67
- logger.info("copy complete #{from + docs.size}/#{total}")
68
- num += 1
72
+ send_with_retry(dest_client, bulk_message)
73
+
74
+ logger.info("copy complete (#{from}-#{from + docs.size})/#{total}")
69
75
  end
70
76
  end
71
77
 
@@ -243,12 +249,27 @@ module Ej
243
249
  bulk_message << record
244
250
  end
245
251
  bulk_message.each_slice(10000).each do |block|
246
- @client.bulk body: block
252
+ send_with_retry(@client, block)
247
253
  end
248
254
  end
249
255
 
250
256
  private
251
257
 
258
+ def send_with_retry(client, bulk_message, retry_on_failure = 5)
259
+ retries = 0
260
+ begin
261
+ client.bulk body: bulk_message unless bulk_message.empty?
262
+ rescue => e
263
+ if retries < retry_on_failure
264
+ retries += 1
265
+ @logger.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
266
+ sleep 2**retries
267
+ retry
268
+ end
269
+ raise "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
270
+ end
271
+ end
272
+
252
273
  def parse_json(buffer)
253
274
  begin
254
275
  data = Yajl::Parser.parse(buffer)
data/lib/ej/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Ej
2
- VERSION = '0.1.0'
2
+ VERSION = '0.1.1'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ej
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-02 00:00:00.000000000 Z
11
+ date: 2017-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -136,6 +136,20 @@ dependencies:
136
136
  - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: parallel
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
139
153
  description: elasticsearch command line utility. Returns all results in json.
140
154
  email:
141
155
  - toyama0919@gmail.com