elastic-util 0.1.2 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/elastic-util +11 -0
- data/lib/elastic_util.rb +68 -30
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: af7028b9a21af54dc8f66849ce7a22e9739dbf0a2b7d8d2fd4091144337b3fda
|
4
|
+
data.tar.gz: 8caf084b468de28607516b1c8d8643de9373732a5d1628db4618307f6b7dac64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07ce1c430cd062388456fc8d19717546457aba8bd20952a54697d17192a36e63367d66a37f78f8fc08d48d0feb89fd5c26c53fccc32a0f5df26e65e31278bc73
|
7
|
+
data.tar.gz: bb50a5ada50644ee7edaf831af2d1966d4610515d268310084a01c0d0f278c2679c3a6757fc62ee6c6863c5f53c03842a2190335ed807037c97d7cd2eb004a38
|
data/bin/elastic-util
CHANGED
@@ -23,6 +23,15 @@ when "backup"
|
|
23
23
|
opts.on('--exclude-fields x,y,z', Array, "The fields to exclude from backup. Default is '_id'.") do |val|
|
24
24
|
options[:exclude_fields] = val.collect {|it| it.strip }
|
25
25
|
end
|
26
|
+
opts.on('--replace-types type1:_doc,type2:_doc', Array, "Replace certain types with a different type.") do |val|
|
27
|
+
options[:replace_types] = {}
|
28
|
+
val.each do |it|
|
29
|
+
pair = it.split(":").collect {|p| p.strip }
|
30
|
+
if pair.size == 2
|
31
|
+
options[:replace_types][pair[0]] = pair[1]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
26
35
|
opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
|
27
36
|
options[:size] = val.to_i
|
28
37
|
end
|
@@ -100,6 +109,8 @@ when "restore"
|
|
100
109
|
exit 1
|
101
110
|
end
|
102
111
|
|
112
|
+
when "-v"
|
113
|
+
puts ElasticUtil::VERSION
|
103
114
|
else
|
104
115
|
$stderr.puts usage
|
105
116
|
exit 1
|
data/lib/elastic_util.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'net/http'
|
3
|
+
require 'openssl'
|
3
4
|
require 'json'
|
4
5
|
require 'fileutils'
|
5
6
|
|
@@ -12,7 +13,7 @@ require 'fileutils'
|
|
12
13
|
#
|
13
14
|
module ElasticUtil
|
14
15
|
|
15
|
-
VERSION = "0.1.
|
16
|
+
VERSION = "0.1.7"
|
16
17
|
|
17
18
|
# The name of the data directory, relative to the user provided backup directory.
|
18
19
|
DUMP_DIR = "es_data"
|
@@ -43,6 +44,7 @@ module ElasticUtil
|
|
43
44
|
# @option opts [Array] :indices The indices to backup. Default is all.
|
44
45
|
# @option opts [Array] :exclude_indices Exclude certain indexes.
|
45
46
|
# @option opts [Array] :exclude_fields Exclude certain fields. Default is ['_id'].
|
47
|
+
# @option opts [Array] :replace_types Replace certain types with a different type, separated by a colon. eg. 'type1:type2' or 'stat:_doc'
|
46
48
|
# @option opts [String] :scroll The scroll api parameter, Default is '5m'.
|
47
49
|
# @option opts [Integer] :size The size api parameter. Default is 1000.
|
48
50
|
# @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
|
@@ -52,19 +54,14 @@ module ElasticUtil
|
|
52
54
|
#
|
53
55
|
def self.backup(url, backup_dir, opts={})
|
54
56
|
start_time = Time.now
|
57
|
+
url = url.strip.chomp("/")
|
55
58
|
backup_dir = backup_dir.strip
|
56
59
|
path = File.join(backup_dir.strip, DUMP_DIR)
|
57
60
|
indices = []
|
58
|
-
|
59
61
|
# ping it first
|
62
|
+
response = nil
|
60
63
|
uri = URI(url)
|
61
|
-
response =
|
62
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
63
|
-
http.read_timeout = 5
|
64
|
-
http.open_timeout = 5
|
65
|
-
response = http.start() {|http|
|
66
|
-
http.get("/")
|
67
|
-
}
|
64
|
+
response = api_get(uri)
|
68
65
|
if !response.is_a?(Net::HTTPSuccess)
|
69
66
|
raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
|
70
67
|
end
|
@@ -73,8 +70,9 @@ module ElasticUtil
|
|
73
70
|
if opts[:indices]
|
74
71
|
indices = opts[:indices]
|
75
72
|
else
|
73
|
+
response = nil
|
76
74
|
uri = URI(url + "/_cat/indices?format=json")
|
77
|
-
response =
|
75
|
+
response = api_get(uri)
|
78
76
|
if !response.is_a?(Net::HTTPSuccess)
|
79
77
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
80
78
|
end
|
@@ -123,7 +121,7 @@ module ElasticUtil
|
|
123
121
|
}
|
124
122
|
uri.query = URI.encode_www_form(params)
|
125
123
|
# puts "HTTP REQUEST #{uri.inspect}"
|
126
|
-
response =
|
124
|
+
response = api_get(uri)
|
127
125
|
if !response.is_a?(Net::HTTPSuccess)
|
128
126
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
129
127
|
end
|
@@ -131,7 +129,7 @@ module ElasticUtil
|
|
131
129
|
raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
|
132
130
|
scroll_id = json_response['_scroll_id']
|
133
131
|
hits = json_response['hits']['hits']
|
134
|
-
save_bulk_data(path, hits)
|
132
|
+
save_bulk_data(path, hits, nil, opts)
|
135
133
|
|
136
134
|
file_index = 1
|
137
135
|
# scroll requests
|
@@ -143,7 +141,7 @@ module ElasticUtil
|
|
143
141
|
}
|
144
142
|
uri.query = URI.encode_www_form(params)
|
145
143
|
# puts "HTTP REQUEST #{uri.inspect}"
|
146
|
-
response =
|
144
|
+
response = api_get(uri)
|
147
145
|
if !response.is_a?(Net::HTTPSuccess)
|
148
146
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
149
147
|
end
|
@@ -151,11 +149,7 @@ module ElasticUtil
|
|
151
149
|
raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
|
152
150
|
scroll_id = json_response['_scroll_id']
|
153
151
|
hits = json_response['hits']['hits']
|
154
|
-
|
155
|
-
save_bulk_data(path, hits, file_index)
|
156
|
-
else
|
157
|
-
save_bulk_data(path, hits)
|
158
|
-
end
|
152
|
+
save_bulk_data(path, hits, file_index, opts)
|
159
153
|
file_index += 1
|
160
154
|
end
|
161
155
|
end
|
@@ -190,14 +184,7 @@ module ElasticUtil
|
|
190
184
|
|
191
185
|
# ping it first
|
192
186
|
uri = URI(url)
|
193
|
-
response =
|
194
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
195
|
-
http.read_timeout = 5
|
196
|
-
http.open_timeout = 5
|
197
|
-
response = http.start() {|http|
|
198
|
-
http.get("/")
|
199
|
-
}
|
200
|
-
|
187
|
+
response = api_get(uri)
|
201
188
|
if !response.is_a?(Net::HTTPSuccess)
|
202
189
|
raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
|
203
190
|
end
|
@@ -214,9 +201,8 @@ module ElasticUtil
|
|
214
201
|
found_files.each_with_index do |file, i|
|
215
202
|
puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
|
216
203
|
payload = File.read(file)
|
217
|
-
|
218
|
-
|
219
|
-
response = http.post("/_bulk", payload)
|
204
|
+
uri = URI(url + "/_bulk")
|
205
|
+
response = api_post(uri, payload, {:headers => {"Content-Type" => "application/x-ndjson"} })
|
220
206
|
if !response.is_a?(Net::HTTPSuccess)
|
221
207
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
222
208
|
end
|
@@ -235,8 +221,12 @@ module ElasticUtil
|
|
235
221
|
FileUtils.mkdir_p(dir_name)
|
236
222
|
file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
|
237
223
|
# prepare record for bulk api injection
|
224
|
+
doc_type = hit['_type']
|
225
|
+
if opts[:replace_types] && opts[:replace_types][doc_type]
|
226
|
+
doc_type = opts[:replace_types][doc_type]
|
227
|
+
end
|
238
228
|
action_json = {'index' => {
|
239
|
-
'_index' => hit['_index'], '_type' =>
|
229
|
+
'_index' => hit['_index'], '_type' => doc_type, '_id' => hit['_id']
|
240
230
|
} }
|
241
231
|
source_json = hit['_source']
|
242
232
|
if opts[:exclude_fields] && source_json
|
@@ -244,6 +234,7 @@ module ElasticUtil
|
|
244
234
|
source_json.delete(field)
|
245
235
|
end
|
246
236
|
end
|
237
|
+
|
247
238
|
File.open(file_name, 'a') do |file|
|
248
239
|
file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
|
249
240
|
end
|
@@ -251,4 +242,51 @@ module ElasticUtil
|
|
251
242
|
end
|
252
243
|
end
|
253
244
|
|
245
|
+
def self.api_get(uri, opts={})
|
246
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
247
|
+
if uri.scheme == 'https'
|
248
|
+
http.use_ssl = true
|
249
|
+
# todo: always ignore ssl errors for now, but this should be an option
|
250
|
+
# http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
251
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
252
|
+
end
|
253
|
+
http.read_timeout = opts[:read_timeout] || (60*15)
|
254
|
+
http.open_timeout = opts[:open_timeout] || 5
|
255
|
+
request = Net::HTTP::Get.new uri.request_uri
|
256
|
+
if opts[:headers]
|
257
|
+
opts[:headers].each do |k,v|
|
258
|
+
request[k] = v
|
259
|
+
end
|
260
|
+
end
|
261
|
+
if uri.user
|
262
|
+
request.basic_auth uri.user, uri.password
|
263
|
+
end
|
264
|
+
response = http.request(request)
|
265
|
+
return response
|
266
|
+
end
|
267
|
+
|
268
|
+
def self.api_post(uri, payload, opts={})
|
269
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
270
|
+
if uri.scheme == 'https'
|
271
|
+
http.use_ssl = true
|
272
|
+
# todo: always ignore ssl errors for now, but this should be an option
|
273
|
+
# http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
274
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
275
|
+
end
|
276
|
+
http.read_timeout = opts[:read_timeout] || (60*15)
|
277
|
+
http.open_timeout = opts[:open_timeout] || 5
|
278
|
+
request = Net::HTTP::Post.new uri.request_uri
|
279
|
+
if opts[:headers]
|
280
|
+
opts[:headers].each do |k,v|
|
281
|
+
request[k] = v
|
282
|
+
end
|
283
|
+
end
|
284
|
+
if uri.user
|
285
|
+
request.basic_auth uri.user, uri.password
|
286
|
+
end
|
287
|
+
request.body = payload
|
288
|
+
response = http.request(request)
|
289
|
+
return response
|
290
|
+
end
|
291
|
+
|
254
292
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elastic-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Dickson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
75
|
version: '0'
|
76
76
|
requirements: []
|
77
77
|
rubyforge_project:
|
78
|
-
rubygems_version: 2.
|
78
|
+
rubygems_version: 2.7.6
|
79
79
|
signing_key:
|
80
80
|
specification_version: 4
|
81
81
|
summary: Provides backup and restore for ElasticSearch data
|