elastic-util 0.1.2 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +5 -5
  2. data/bin/elastic-util +11 -0
  3. data/lib/elastic_util.rb +68 -30
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 65b4319be21549be3a86ed896834f464f1d36542
4
- data.tar.gz: 40d05eb1c8f6db2d98b5606f14ab5fe26eda6a2d
2
+ SHA256:
3
+ metadata.gz: af7028b9a21af54dc8f66849ce7a22e9739dbf0a2b7d8d2fd4091144337b3fda
4
+ data.tar.gz: 8caf084b468de28607516b1c8d8643de9373732a5d1628db4618307f6b7dac64
5
5
  SHA512:
6
- metadata.gz: 72cfa5a35d7746aeea475972a132966b120ea9571a3176d218d5c138a13a6661085900856b02ec41a1b10dc7f886c136f4d274d5e2c4e40c33258c8539379b40
7
- data.tar.gz: 96da3e80ab3dfa74f214e6d5d27cd7ec5e3b0b2523faa664ec3894a4eea631d8d2bd005d7ef398f72f0aef09afc20395ba1ae61f52352b520f92085ff5182e1d
6
+ metadata.gz: 07ce1c430cd062388456fc8d19717546457aba8bd20952a54697d17192a36e63367d66a37f78f8fc08d48d0feb89fd5c26c53fccc32a0f5df26e65e31278bc73
7
+ data.tar.gz: bb50a5ada50644ee7edaf831af2d1966d4610515d268310084a01c0d0f278c2679c3a6757fc62ee6c6863c5f53c03842a2190335ed807037c97d7cd2eb004a38
@@ -23,6 +23,15 @@ when "backup"
23
23
  opts.on('--exclude-fields x,y,z', Array, "The fields to exclude from backup. Default is '_id'.") do |val|
24
24
  options[:exclude_fields] = val.collect {|it| it.strip }
25
25
  end
26
+ opts.on('--replace-types type1:_doc,type2:_doc', Array, "Replace certain types with a different type.") do |val|
27
+ options[:replace_types] = {}
28
+ val.each do |it|
29
+ pair = it.split(":").collect {|p| p.strip }
30
+ if pair.size == 2
31
+ options[:replace_types][pair[0]] = pair[1]
32
+ end
33
+ end
34
+ end
26
35
  opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
27
36
  options[:size] = val.to_i
28
37
  end
@@ -100,6 +109,8 @@ when "restore"
100
109
  exit 1
101
110
  end
102
111
 
112
+ when "-v"
113
+ puts ElasticUtil::VERSION
103
114
  else
104
115
  $stderr.puts usage
105
116
  exit 1
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'net/http'
3
+ require 'openssl'
3
4
  require 'json'
4
5
  require 'fileutils'
5
6
 
@@ -12,7 +13,7 @@ require 'fileutils'
12
13
  #
13
14
  module ElasticUtil
14
15
 
15
- VERSION = "0.1.2"
16
+ VERSION = "0.1.7"
16
17
 
17
18
  # The name of the data directory, relative to the user provided backup directory.
18
19
  DUMP_DIR = "es_data"
@@ -43,6 +44,7 @@ module ElasticUtil
43
44
  # @option opts [Array] :indices The indices to backup. Default is all.
44
45
  # @option opts [Array] :exclude_indices Exclude certain indexes.
45
46
  # @option opts [Array] :exclude_fields Exclude certain fields. Default is ['_id'].
47
+ # @option opts [Array] :replace_types Replace certain types with a different type, separated by a colon. eg. 'type1:type2' or 'stat:_doc'
46
48
  # @option opts [String] :scroll The scroll api parameter, Default is '5m'.
47
49
  # @option opts [Integer] :size The size api parameter. Default is 1000.
48
50
  # @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
@@ -52,19 +54,14 @@ module ElasticUtil
52
54
  #
53
55
  def self.backup(url, backup_dir, opts={})
54
56
  start_time = Time.now
57
+ url = url.strip.chomp("/")
55
58
  backup_dir = backup_dir.strip
56
59
  path = File.join(backup_dir.strip, DUMP_DIR)
57
60
  indices = []
58
-
59
61
  # ping it first
62
+ response = nil
60
63
  uri = URI(url)
61
- response = Net::HTTP.get_response(uri)
62
- http = Net::HTTP.new(uri.host, uri.port)
63
- http.read_timeout = 5
64
- http.open_timeout = 5
65
- response = http.start() {|http|
66
- http.get("/")
67
- }
64
+ response = api_get(uri)
68
65
  if !response.is_a?(Net::HTTPSuccess)
69
66
  raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
70
67
  end
@@ -73,8 +70,9 @@ module ElasticUtil
73
70
  if opts[:indices]
74
71
  indices = opts[:indices]
75
72
  else
73
+ response = nil
76
74
  uri = URI(url + "/_cat/indices?format=json")
77
- response = Net::HTTP.get_response(uri)
75
+ response = api_get(uri)
78
76
  if !response.is_a?(Net::HTTPSuccess)
79
77
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
80
78
  end
@@ -123,7 +121,7 @@ module ElasticUtil
123
121
  }
124
122
  uri.query = URI.encode_www_form(params)
125
123
  # puts "HTTP REQUEST #{uri.inspect}"
126
- response = Net::HTTP.get_response(uri)
124
+ response = api_get(uri)
127
125
  if !response.is_a?(Net::HTTPSuccess)
128
126
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
129
127
  end
@@ -131,7 +129,7 @@ module ElasticUtil
131
129
  raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
132
130
  scroll_id = json_response['_scroll_id']
133
131
  hits = json_response['hits']['hits']
134
- save_bulk_data(path, hits)
132
+ save_bulk_data(path, hits, nil, opts)
135
133
 
136
134
  file_index = 1
137
135
  # scroll requests
@@ -143,7 +141,7 @@ module ElasticUtil
143
141
  }
144
142
  uri.query = URI.encode_www_form(params)
145
143
  # puts "HTTP REQUEST #{uri.inspect}"
146
- response = Net::HTTP.get_response(uri)
144
+ response = api_get(uri)
147
145
  if !response.is_a?(Net::HTTPSuccess)
148
146
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
149
147
  end
@@ -151,11 +149,7 @@ module ElasticUtil
151
149
  raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
152
150
  scroll_id = json_response['_scroll_id']
153
151
  hits = json_response['hits']['hits']
154
- if file_index > 0
155
- save_bulk_data(path, hits, file_index)
156
- else
157
- save_bulk_data(path, hits)
158
- end
152
+ save_bulk_data(path, hits, file_index, opts)
159
153
  file_index += 1
160
154
  end
161
155
  end
@@ -190,14 +184,7 @@ module ElasticUtil
190
184
 
191
185
  # ping it first
192
186
  uri = URI(url)
193
- response = Net::HTTP.get_response(uri)
194
- http = Net::HTTP.new(uri.host, uri.port)
195
- http.read_timeout = 5
196
- http.open_timeout = 5
197
- response = http.start() {|http|
198
- http.get("/")
199
- }
200
-
187
+ response = api_get(uri)
201
188
  if !response.is_a?(Net::HTTPSuccess)
202
189
  raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
203
190
  end
@@ -214,9 +201,8 @@ module ElasticUtil
214
201
  found_files.each_with_index do |file, i|
215
202
  puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
216
203
  payload = File.read(file)
217
- # uri = URI(url)
218
- http = Net::HTTP.new(uri.host, uri.port)
219
- response = http.post("/_bulk", payload)
204
+ uri = URI(url + "/_bulk")
205
+ response = api_post(uri, payload, {:headers => {"Content-Type" => "application/x-ndjson"} })
220
206
  if !response.is_a?(Net::HTTPSuccess)
221
207
  raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
222
208
  end
@@ -235,8 +221,12 @@ module ElasticUtil
235
221
  FileUtils.mkdir_p(dir_name)
236
222
  file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
237
223
  # prepare record for bulk api injection
224
+ doc_type = hit['_type']
225
+ if opts[:replace_types] && opts[:replace_types][doc_type]
226
+ doc_type = opts[:replace_types][doc_type]
227
+ end
238
228
  action_json = {'index' => {
239
- '_index' => hit['_index'], '_type' => hit['_type'], '_id' => hit['_id']
229
+ '_index' => hit['_index'], '_type' => doc_type, '_id' => hit['_id']
240
230
  } }
241
231
  source_json = hit['_source']
242
232
  if opts[:exclude_fields] && source_json
@@ -244,6 +234,7 @@ module ElasticUtil
244
234
  source_json.delete(field)
245
235
  end
246
236
  end
237
+
247
238
  File.open(file_name, 'a') do |file|
248
239
  file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
249
240
  end
@@ -251,4 +242,51 @@ module ElasticUtil
251
242
  end
252
243
  end
253
244
 
245
+ def self.api_get(uri, opts={})
246
+ http = Net::HTTP.new(uri.host, uri.port)
247
+ if uri.scheme == 'https'
248
+ http.use_ssl = true
249
+ # todo: always ignore ssl errors for now, but this should be an option
250
+ # http.verify_mode = OpenSSL::SSL::VERIFY_PEER
251
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
252
+ end
253
+ http.read_timeout = opts[:read_timeout] || (60*15)
254
+ http.open_timeout = opts[:open_timeout] || 5
255
+ request = Net::HTTP::Get.new uri.request_uri
256
+ if opts[:headers]
257
+ opts[:headers].each do |k,v|
258
+ request[k] = v
259
+ end
260
+ end
261
+ if uri.user
262
+ request.basic_auth uri.user, uri.password
263
+ end
264
+ response = http.request(request)
265
+ return response
266
+ end
267
+
268
+ def self.api_post(uri, payload, opts={})
269
+ http = Net::HTTP.new(uri.host, uri.port)
270
+ if uri.scheme == 'https'
271
+ http.use_ssl = true
272
+ # todo: always ignore ssl errors for now, but this should be an option
273
+ # http.verify_mode = OpenSSL::SSL::VERIFY_PEER
274
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
275
+ end
276
+ http.read_timeout = opts[:read_timeout] || (60*15)
277
+ http.open_timeout = opts[:open_timeout] || 5
278
+ request = Net::HTTP::Post.new uri.request_uri
279
+ if opts[:headers]
280
+ opts[:headers].each do |k,v|
281
+ request[k] = v
282
+ end
283
+ end
284
+ if uri.user
285
+ request.basic_auth uri.user, uri.password
286
+ end
287
+ request.body = payload
288
+ response = http.request(request)
289
+ return response
290
+ end
291
+
254
292
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastic-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Dickson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-08 00:00:00.000000000 Z
11
+ date: 2020-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  version: '0'
76
76
  requirements: []
77
77
  rubyforge_project:
78
- rubygems_version: 2.4.8
78
+ rubygems_version: 2.7.6
79
79
  signing_key:
80
80
  specification_version: 4
81
81
  summary: Provides backup and restore for ElasticSearch data