ej 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e76e6b6fc5c6bebf55f97553afc843cbb9d9b730
4
- data.tar.gz: '07830f36bd93af33e7686d264ef25dcf5b34d10b'
3
+ metadata.gz: ee1f79efd243a83e80d9b4ace0f58213ceee0c76
4
+ data.tar.gz: bc86ffef97ad3a387c332a735437c661f6825970
5
5
  SHA512:
6
- metadata.gz: 96e69fe5ee1ba6ae3ecb8c53030b6710379bd312f8bef4ffcc5cea5d5e0281bfc3f11478b743d37122fcf01c5d06fe5476f4103805d18105a77f5a30c1926458
7
- data.tar.gz: 9788d2673c94ac4f30084fc7951f660484268e440934a45495bfbe12ff1aeaa503708ff1af36c3b96d74b0539e75ed0df15816668d2602a9c36fde4b81aac3b5
6
+ metadata.gz: 4a1074107c6d568c830ee1153d5adf0b294bcaf7c408ee45117cd4a05b0a09504aa4d362b9f88d6b4bab4e916eb8d29687bbd327681ce8e258b94002cf144ee9
7
+ data.tar.gz: 5b627b9a60eb8cc01194795bb63aa99a9e28c03151b3964288a1bf18f763ae8f418f18706dbb8c95815e33598ca2b75bcd9731058d2a81bf2749fb29f814a2a6
data/ej.gemspec CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "pry-doc"
25
25
  spec.add_development_dependency 'rspec', '~> 2.4'
26
26
  spec.add_runtime_dependency "thor"
27
- spec.add_runtime_dependency "yajl-ruby"
28
27
  spec.add_runtime_dependency "elasticsearch"
29
28
  spec.add_runtime_dependency "hashie"
30
29
  spec.add_runtime_dependency "parallel"
data/lib/ej/commands.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'ej/core'
2
2
  require 'thor'
3
- require 'yajl'
3
+ require 'json'
4
4
 
5
5
  module Ej
6
6
  class Commands < Thor
@@ -71,6 +71,7 @@ module Ej
71
71
  option :per, type: :numeric, default: nil, desc: 'per'
72
72
  option :scroll, type: :string, default: "1m", desc: 'scroll'
73
73
  option :dest_index, type: :string, desc: 'dest index'
74
+ option :slice_max, type: :numeric, desc: 'slice max'
74
75
  def copy
75
76
  @core.copy(
76
77
  options[:source],
@@ -78,7 +79,8 @@ module Ej
78
79
  options[:query],
79
80
  options[:per],
80
81
  options[:scroll],
81
- options[:dest_index]
82
+ options[:dest_index],
83
+ options[:slice_max]
82
84
  )
83
85
  end
84
86
 
@@ -171,7 +173,7 @@ module Ej
171
173
  desc 'not_analyzed', 'not analyzed'
172
174
  def not_analyzed
173
175
  json = File.read(File.expand_path('../../../template/not_analyze_template.json', __FILE__))
174
- hash = Yajl::Parser.parse(json)
176
+ hash = JSON.parse(json)
175
177
  puts_with_format(@indices.put_template('ej_init', hash))
176
178
  end
177
179
 
@@ -186,7 +188,7 @@ module Ej
186
188
 
187
189
  desc 'put_template', 'put template'
188
190
  def put_template(name)
189
- hash = Yajl::Parser.parse(STDIN.read)
191
+ hash = JSON.parse(STDIN.read)
190
192
  puts_with_format(@indices.put_template(name, hash))
191
193
  end
192
194
 
@@ -255,7 +257,7 @@ module Ej
255
257
  private
256
258
 
257
259
  def puts_with_format(object)
258
- puts Yajl::Encoder.encode(object, pretty: true)
260
+ puts JSON.pretty_generate(object)
259
261
  end
260
262
 
261
263
  end
data/lib/ej/core.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'yajl'
1
+ require 'json'
2
2
  require 'elasticsearch'
3
3
  require 'hashie'
4
4
  require 'parallel'
@@ -54,8 +54,8 @@ module Ej
54
54
  docs.each do |doc|
55
55
  source = doc.delete('_source')
56
56
  doc.delete('_score')
57
- bulk_message << Yajl::Encoder.encode({ 'index' => doc.to_h })
58
- bulk_message << Yajl::Encoder.encode(source)
57
+ bulk_message << JSON.dump({ 'index' => doc.to_h })
58
+ bulk_message << JSON.dump(source)
59
59
  end
60
60
  num += 1
61
61
  puts bulk_message.join("\n")
@@ -147,37 +147,41 @@ module Ej
147
147
  connect_with_retry { @client.bulk body: bulk_message unless bulk_message.empty? }
148
148
  end
149
149
 
150
- def copy(source, dest, query, per_size, scroll, dest_index)
150
+ def copy(source, dest, query, per_size, scroll, dest_index, slice_max)
151
151
  source_client = Elasticsearch::Client.new hosts: source
152
152
  dest_client = Elasticsearch::Client.new hosts: dest
153
153
 
154
- scroll_option = get_scroll_option(@index, query, per_size, scroll)
155
- r = connect_with_retry { source_client.search(scroll_option) }
156
- total = r['hits']['total']
157
- i = 0
158
- i += bulk_results(r, dest_client, i, total, dest_index)
159
-
160
- while r = connect_with_retry { source_client.scroll(scroll_id: r['_scroll_id'], scroll: scroll) } and
161
- (not r['hits']['hits'].empty?) do
162
- i += bulk_results(r, dest_client, i, total, dest_index)
154
+ parallel_array = slice_max ? slice_max.times.to_a : [0]
155
+ Parallel.map(parallel_array, :in_processes=>parallel_array.size) do |slice_id|
156
+ scroll_option = get_scroll_option(@index, query, per_size, scroll, slice_id, slice_max)
157
+ r = connect_with_retry { source_client.search(scroll_option) }
158
+ total = r['hits']['total']
159
+ i = 0
160
+ i += bulk_results(r, dest_client, i, total, dest_index, slice_id)
161
+
162
+ while r = connect_with_retry { source_client.scroll(scroll_id: r['_scroll_id'], scroll: scroll) } and
163
+ (not r['hits']['hits'].empty?) do
164
+ i += bulk_results(r, dest_client, i, total, dest_index, slice_id)
165
+ end
163
166
  end
164
167
  end
165
168
 
166
169
  private
167
170
 
168
- def bulk_results(results, dest_client, before_size, total, dest_index)
171
+ def bulk_results(results, dest_client, before_size, total, dest_index, slice_id)
169
172
  bulk_message = convert_results(results, dest_index)
170
173
  connect_with_retry do
171
174
  dest_client.bulk body: bulk_message unless bulk_message.empty?
172
175
  to_size = before_size + (bulk_message.size/2)
173
- @logger.info "copy complete (#{before_size}-#{to_size})/#{total}"
176
+ @logger.info "slice_id[#{slice_id}] copy complete (#{before_size}-#{to_size})/#{total}"
174
177
  end
175
178
  return (bulk_message.size/2)
176
179
  end
177
180
 
178
- def get_scroll_option(index, query, size, scroll)
181
+ def get_scroll_option(index, query, size, scroll, slice_id, slice_max)
179
182
  body = {}
180
183
  body[:query] = { query_string: { query: query } } unless query.nil?
184
+ body[:slice] = { id: slice_id, max: slice_max } if slice_max
181
185
  search_option = { index: index, scroll: scroll, body: body, size: (size || DEFAULT_PER) }
182
186
  search_option
183
187
  end
@@ -207,7 +211,7 @@ module Ej
207
211
  if retries < retry_on_failure
208
212
  retries += 1
209
213
  @logger.warn "Could not connect to Elasticsearch, resetting connection and trying again. #{e.message}"
210
- sleep 2**retries
214
+ sleep 10**retries
211
215
  retry
212
216
  end
213
217
  raise "Could not connect to Elasticsearch after #{retries} retries. #{e.message}"
data/lib/ej/util.rb CHANGED
@@ -2,11 +2,11 @@ module Ej
2
2
  class Util
3
3
  def self.parse_json(buffer)
4
4
  begin
5
- data = Yajl::Parser.parse(buffer)
5
+ data = JSON.parse(buffer)
6
6
  rescue => e
7
7
  data = []
8
8
  buffer.lines.each do |line|
9
- data << Yajl::Parser.parse(line)
9
+ data << JSON.parse(line)
10
10
  end
11
11
  end
12
12
  data.class == Array ? data : [data]
data/lib/ej/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Ej
2
- VERSION = '0.1.11'
2
+ VERSION = '0.1.12'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ej
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: yajl-ruby
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: elasticsearch
113
99
  requirement: !ruby/object:Gem::Requirement