elastic-util 0.1.2 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +5 -5
  2. data/bin/elastic-util +11 -0
  3. data/lib/elastic_util.rb +68 -30
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 65b4319be21549be3a86ed896834f464f1d36542
4
- data.tar.gz: 40d05eb1c8f6db2d98b5606f14ab5fe26eda6a2d
2
+ SHA256:
3
+ metadata.gz: af7028b9a21af54dc8f66849ce7a22e9739dbf0a2b7d8d2fd4091144337b3fda
4
+ data.tar.gz: 8caf084b468de28607516b1c8d8643de9373732a5d1628db4618307f6b7dac64
5
5
  SHA512:
6
- metadata.gz: 72cfa5a35d7746aeea475972a132966b120ea9571a3176d218d5c138a13a6661085900856b02ec41a1b10dc7f886c136f4d274d5e2c4e40c33258c8539379b40
7
- data.tar.gz: 96da3e80ab3dfa74f214e6d5d27cd7ec5e3b0b2523faa664ec3894a4eea631d8d2bd005d7ef398f72f0aef09afc20395ba1ae61f52352b520f92085ff5182e1d
6
+ metadata.gz: 07ce1c430cd062388456fc8d19717546457aba8bd20952a54697d17192a36e63367d66a37f78f8fc08d48d0feb89fd5c26c53fccc32a0f5df26e65e31278bc73
7
+ data.tar.gz: bb50a5ada50644ee7edaf831af2d1966d4610515d268310084a01c0d0f278c2679c3a6757fc62ee6c6863c5f53c03842a2190335ed807037c97d7cd2eb004a38
@@ -23,6 +23,15 @@ when "backup"
23
23
  opts.on('--exclude-fields x,y,z', Array, "The fields to exclude from backup. Default is '_id'.") do |val|
24
24
  options[:exclude_fields] = val.collect {|it| it.strip }
25
25
  end
26
+ opts.on('--replace-types type1:_doc,type2:_doc', Array, "Replace certain types with a different type.") do |val|
27
+ options[:replace_types] = {}
28
+ val.each do |it|
29
+ pair = it.split(":").collect {|p| p.strip }
30
+ if pair.size == 2
31
+ options[:replace_types][pair[0]] = pair[1]
32
+ end
33
+ end
34
+ end
26
35
  opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
27
36
  options[:size] = val.to_i
28
37
  end
@@ -100,6 +109,8 @@ when "restore"
100
109
  exit 1
101
110
  end
102
111
 
112
+ when "-v"
113
+ puts ElasticUtil::VERSION
103
114
  else
104
115
  $stderr.puts usage
105
116
  exit 1
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'net/http'
3
+ require 'openssl'
3
4
  require 'json'
4
5
  require 'fileutils'
5
6
 
@@ -12,7 +13,7 @@ require 'fileutils'
12
13
  #
13
14
  module ElasticUtil
14
15
 
15
- VERSION = "0.1.2"
16
+ VERSION = "0.1.7"
16
17
 
17
18
  # The name of the data directory, relative to the user provided backup directory.
18
19
  DUMP_DIR = "es_data"
@@ -43,6 +44,7 @@ module ElasticUtil
43
44
  # @option opts [Array] :indices The indices to backup. Default is all.
44
45
  # @option opts [Array] :exclude_indices Exclude certain indexes.
45
46
  # @option opts [Array] :exclude_fields Exclude certain fields. Default is ['_id'].
47
+ # @option opts [Array] :replace_types Replace certain types with a different type, separated by a colon. eg. 'type1:type2' or 'stat:_doc'
46
48
  # @option opts [String] :scroll The scroll api parameter, Default is '5m'.
47
49
  # @option opts [Integer] :size The size api parameter. Default is 1000.
48
50
  # @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
@@ -52,19 +54,14 @@ module ElasticUtil
52
54
  #
53
55
  def self.backup(url, backup_dir, opts={})
54
56
  start_time = Time.now
57
+ url = url.strip.chomp("/")
55
58
  backup_dir = backup_dir.strip
56
59
  path = File.join(backup_dir.strip, DUMP_DIR)
57
60
  indices = []
58
-
59
61
  # ping it first
62
+ response = nil
60
63
  uri = URI(url)
61
- response = Net::HTTP.get_response(uri)
62
- http = Net::HTTP.new(uri.host, uri.port)
63
- http.read_timeout = 5
64
- http.open_timeout = 5
65
- response = http.start() {|http|
66
- http.get("/")
67
- }
64
+ response = api_get(uri)
68
65
  if !response.is_a?(Net::HTTPSuccess)
69
66
  raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
70
67
  end
@@ -73,8 +70,9 @@ module ElasticUtil
73
70
  if opts[:indices]
74
71
  indices = opts[:indices]
75
72
  else
73
+ response = nil
76
74
  uri = URI(url + "/_cat/indices?format=json")
77
- response = Net::HTTP.get_response(uri)
75
+ response = api_get(uri)
78
76
  if !response.is_a?(Net::HTTPSuccess)
79
77
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
80
78
  end
@@ -123,7 +121,7 @@ module ElasticUtil
123
121
  }
124
122
  uri.query = URI.encode_www_form(params)
125
123
  # puts "HTTP REQUEST #{uri.inspect}"
126
- response = Net::HTTP.get_response(uri)
124
+ response = api_get(uri)
127
125
  if !response.is_a?(Net::HTTPSuccess)
128
126
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
129
127
  end
@@ -131,7 +129,7 @@ module ElasticUtil
131
129
  raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
132
130
  scroll_id = json_response['_scroll_id']
133
131
  hits = json_response['hits']['hits']
134
- save_bulk_data(path, hits)
132
+ save_bulk_data(path, hits, nil, opts)
135
133
 
136
134
  file_index = 1
137
135
  # scroll requests
@@ -143,7 +141,7 @@ module ElasticUtil
143
141
  }
144
142
  uri.query = URI.encode_www_form(params)
145
143
  # puts "HTTP REQUEST #{uri.inspect}"
146
- response = Net::HTTP.get_response(uri)
144
+ response = api_get(uri)
147
145
  if !response.is_a?(Net::HTTPSuccess)
148
146
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
149
147
  end
@@ -151,11 +149,7 @@ module ElasticUtil
151
149
  raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
152
150
  scroll_id = json_response['_scroll_id']
153
151
  hits = json_response['hits']['hits']
154
- if file_index > 0
155
- save_bulk_data(path, hits, file_index)
156
- else
157
- save_bulk_data(path, hits)
158
- end
152
+ save_bulk_data(path, hits, file_index, opts)
159
153
  file_index += 1
160
154
  end
161
155
  end
@@ -190,14 +184,7 @@ module ElasticUtil
190
184
 
191
185
  # ping it first
192
186
  uri = URI(url)
193
- response = Net::HTTP.get_response(uri)
194
- http = Net::HTTP.new(uri.host, uri.port)
195
- http.read_timeout = 5
196
- http.open_timeout = 5
197
- response = http.start() {|http|
198
- http.get("/")
199
- }
200
-
187
+ response = api_get(uri)
201
188
  if !response.is_a?(Net::HTTPSuccess)
202
189
  raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
203
190
  end
@@ -214,9 +201,8 @@ module ElasticUtil
214
201
  found_files.each_with_index do |file, i|
215
202
  puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
216
203
  payload = File.read(file)
217
- # uri = URI(url)
218
- http = Net::HTTP.new(uri.host, uri.port)
219
- response = http.post("/_bulk", payload)
204
+ uri = URI(url + "/_bulk")
205
+ response = api_post(uri, payload, {:headers => {"Content-Type" => "application/x-ndjson"} })
220
206
  if !response.is_a?(Net::HTTPSuccess)
221
207
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
222
208
  end
@@ -235,8 +221,12 @@ module ElasticUtil
235
221
  FileUtils.mkdir_p(dir_name)
236
222
  file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
237
223
  # prepare record for bulk api injection
224
+ doc_type = hit['_type']
225
+ if opts[:replace_types] && opts[:replace_types][doc_type]
226
+ doc_type = opts[:replace_types][doc_type]
227
+ end
238
228
  action_json = {'index' => {
239
- '_index' => hit['_index'], '_type' => hit['_type'], '_id' => hit['_id']
229
+ '_index' => hit['_index'], '_type' => doc_type, '_id' => hit['_id']
240
230
  } }
241
231
  source_json = hit['_source']
242
232
  if opts[:exclude_fields] && source_json
@@ -244,6 +234,7 @@ module ElasticUtil
244
234
  source_json.delete(field)
245
235
  end
246
236
  end
237
+
247
238
  File.open(file_name, 'a') do |file|
248
239
  file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
249
240
  end
@@ -251,4 +242,51 @@ module ElasticUtil
251
242
  end
252
243
  end
253
244
 
245
+ def self.api_get(uri, opts={})
246
+ http = Net::HTTP.new(uri.host, uri.port)
247
+ if uri.scheme == 'https'
248
+ http.use_ssl = true
249
+ # todo: always ignore ssl errors for now, but this should be an option
250
+ # http.verify_mode = OpenSSL::SSL::VERIFY_PEER
251
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
252
+ end
253
+ http.read_timeout = opts[:read_timeout] || (60*15)
254
+ http.open_timeout = opts[:open_timeout] || 5
255
+ request = Net::HTTP::Get.new uri.request_uri
256
+ if opts[:headers]
257
+ opts[:headers].each do |k,v|
258
+ request[k] = v
259
+ end
260
+ end
261
+ if uri.user
262
+ request.basic_auth uri.user, uri.password
263
+ end
264
+ response = http.request(request)
265
+ return response
266
+ end
267
+
268
+ def self.api_post(uri, payload, opts={})
269
+ http = Net::HTTP.new(uri.host, uri.port)
270
+ if uri.scheme == 'https'
271
+ http.use_ssl = true
272
+ # todo: always ignore ssl errors for now, but this should be an option
273
+ # http.verify_mode = OpenSSL::SSL::VERIFY_PEER
274
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
275
+ end
276
+ http.read_timeout = opts[:read_timeout] || (60*15)
277
+ http.open_timeout = opts[:open_timeout] || 5
278
+ request = Net::HTTP::Post.new uri.request_uri
279
+ if opts[:headers]
280
+ opts[:headers].each do |k,v|
281
+ request[k] = v
282
+ end
283
+ end
284
+ if uri.user
285
+ request.basic_auth uri.user, uri.password
286
+ end
287
+ request.body = payload
288
+ response = http.request(request)
289
+ return response
290
+ end
291
+
254
292
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastic-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Dickson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-08 00:00:00.000000000 Z
11
+ date: 2020-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  version: '0'
76
76
  requirements: []
77
77
  rubyforge_project:
78
- rubygems_version: 2.4.8
78
+ rubygems_version: 2.7.6
79
79
  signing_key:
80
80
  specification_version: 4
81
81
  summary: Provides backup and restore for ElasticSearch data