elastic-util 0.1.2 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/elastic-util +11 -0
- data/lib/elastic_util.rb +68 -30
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: af7028b9a21af54dc8f66849ce7a22e9739dbf0a2b7d8d2fd4091144337b3fda
|
4
|
+
data.tar.gz: 8caf084b468de28607516b1c8d8643de9373732a5d1628db4618307f6b7dac64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07ce1c430cd062388456fc8d19717546457aba8bd20952a54697d17192a36e63367d66a37f78f8fc08d48d0feb89fd5c26c53fccc32a0f5df26e65e31278bc73
|
7
|
+
data.tar.gz: bb50a5ada50644ee7edaf831af2d1966d4610515d268310084a01c0d0f278c2679c3a6757fc62ee6c6863c5f53c03842a2190335ed807037c97d7cd2eb004a38
|
data/bin/elastic-util
CHANGED
@@ -23,6 +23,15 @@ when "backup"
|
|
23
23
|
opts.on('--exclude-fields x,y,z', Array, "The fields to exclude from backup. Default is '_id'.") do |val|
|
24
24
|
options[:exclude_fields] = val.collect {|it| it.strip }
|
25
25
|
end
|
26
|
+
opts.on('--replace-types type1:_doc,type2:_doc', Array, "Replace certain types with a different type.") do |val|
|
27
|
+
options[:replace_types] = {}
|
28
|
+
val.each do |it|
|
29
|
+
pair = it.split(":").collect {|p| p.strip }
|
30
|
+
if pair.size == 2
|
31
|
+
options[:replace_types][pair[0]] = pair[1]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
26
35
|
opts.on( '-s', '--size NUMBER', "The size api parameter. This dicates the size of the files and api payloads. Default is 1000." ) do |val|
|
27
36
|
options[:size] = val.to_i
|
28
37
|
end
|
@@ -100,6 +109,8 @@ when "restore"
|
|
100
109
|
exit 1
|
101
110
|
end
|
102
111
|
|
112
|
+
when "-v"
|
113
|
+
puts ElasticUtil::VERSION
|
103
114
|
else
|
104
115
|
$stderr.puts usage
|
105
116
|
exit 1
|
data/lib/elastic_util.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'net/http'
|
3
|
+
require 'openssl'
|
3
4
|
require 'json'
|
4
5
|
require 'fileutils'
|
5
6
|
|
@@ -12,7 +13,7 @@ require 'fileutils'
|
|
12
13
|
#
|
13
14
|
module ElasticUtil
|
14
15
|
|
15
|
-
VERSION = "0.1.
|
16
|
+
VERSION = "0.1.7"
|
16
17
|
|
17
18
|
# The name of the data directory, relative to the user provided backup directory.
|
18
19
|
DUMP_DIR = "es_data"
|
@@ -43,6 +44,7 @@ module ElasticUtil
|
|
43
44
|
# @option opts [Array] :indices The indices to backup. Default is all.
|
44
45
|
# @option opts [Array] :exclude_indices Exclude certain indexes.
|
45
46
|
# @option opts [Array] :exclude_fields Exclude certain fields. Default is ['_id'].
|
47
|
+
# @option opts [Array] :replace_types Replace certain types with a different type, separated by a colon. eg. 'type1:type2' or 'stat:_doc'
|
46
48
|
# @option opts [String] :scroll The scroll api parameter, Default is '5m'.
|
47
49
|
# @option opts [Integer] :size The size api parameter. Default is 1000.
|
48
50
|
# @option opts [true] :force Delete existing backup directory instead of erroring. Default is false.
|
@@ -52,19 +54,14 @@ module ElasticUtil
|
|
52
54
|
#
|
53
55
|
def self.backup(url, backup_dir, opts={})
|
54
56
|
start_time = Time.now
|
57
|
+
url = url.strip.chomp("/")
|
55
58
|
backup_dir = backup_dir.strip
|
56
59
|
path = File.join(backup_dir.strip, DUMP_DIR)
|
57
60
|
indices = []
|
58
|
-
|
59
61
|
# ping it first
|
62
|
+
response = nil
|
60
63
|
uri = URI(url)
|
61
|
-
response =
|
62
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
63
|
-
http.read_timeout = 5
|
64
|
-
http.open_timeout = 5
|
65
|
-
response = http.start() {|http|
|
66
|
-
http.get("/")
|
67
|
-
}
|
64
|
+
response = api_get(uri)
|
68
65
|
if !response.is_a?(Net::HTTPSuccess)
|
69
66
|
raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
|
70
67
|
end
|
@@ -73,8 +70,9 @@ module ElasticUtil
|
|
73
70
|
if opts[:indices]
|
74
71
|
indices = opts[:indices]
|
75
72
|
else
|
73
|
+
response = nil
|
76
74
|
uri = URI(url + "/_cat/indices?format=json")
|
77
|
-
response =
|
75
|
+
response = api_get(uri)
|
78
76
|
if !response.is_a?(Net::HTTPSuccess)
|
79
77
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
80
78
|
end
|
@@ -123,7 +121,7 @@ module ElasticUtil
|
|
123
121
|
}
|
124
122
|
uri.query = URI.encode_www_form(params)
|
125
123
|
# puts "HTTP REQUEST #{uri.inspect}"
|
126
|
-
response =
|
124
|
+
response = api_get(uri)
|
127
125
|
if !response.is_a?(Net::HTTPSuccess)
|
128
126
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
129
127
|
end
|
@@ -131,7 +129,7 @@ module ElasticUtil
|
|
131
129
|
raise Error, "No scroll_id returned in response:\n#{response.inspect}" unless json_response['_scroll_id']
|
132
130
|
scroll_id = json_response['_scroll_id']
|
133
131
|
hits = json_response['hits']['hits']
|
134
|
-
save_bulk_data(path, hits)
|
132
|
+
save_bulk_data(path, hits, nil, opts)
|
135
133
|
|
136
134
|
file_index = 1
|
137
135
|
# scroll requests
|
@@ -143,7 +141,7 @@ module ElasticUtil
|
|
143
141
|
}
|
144
142
|
uri.query = URI.encode_www_form(params)
|
145
143
|
# puts "HTTP REQUEST #{uri.inspect}"
|
146
|
-
response =
|
144
|
+
response = api_get(uri)
|
147
145
|
if !response.is_a?(Net::HTTPSuccess)
|
148
146
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
149
147
|
end
|
@@ -151,11 +149,7 @@ module ElasticUtil
|
|
151
149
|
raise Error, "No scroll_id returned in response:\n#{response.inspect}\n#{response.body.to_s}" unless json_response['_scroll_id']
|
152
150
|
scroll_id = json_response['_scroll_id']
|
153
151
|
hits = json_response['hits']['hits']
|
154
|
-
|
155
|
-
save_bulk_data(path, hits, file_index)
|
156
|
-
else
|
157
|
-
save_bulk_data(path, hits)
|
158
|
-
end
|
152
|
+
save_bulk_data(path, hits, file_index, opts)
|
159
153
|
file_index += 1
|
160
154
|
end
|
161
155
|
end
|
@@ -190,14 +184,7 @@ module ElasticUtil
|
|
190
184
|
|
191
185
|
# ping it first
|
192
186
|
uri = URI(url)
|
193
|
-
response =
|
194
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
195
|
-
http.read_timeout = 5
|
196
|
-
http.open_timeout = 5
|
197
|
-
response = http.start() {|http|
|
198
|
-
http.get("/")
|
199
|
-
}
|
200
|
-
|
187
|
+
response = api_get(uri)
|
201
188
|
if !response.is_a?(Net::HTTPSuccess)
|
202
189
|
raise Error, "Unable to reach elasticsearch at url '#{url}'!\n#{response.inspect}\n#{response.body.to_s}"
|
203
190
|
end
|
@@ -214,9 +201,8 @@ module ElasticUtil
|
|
214
201
|
found_files.each_with_index do |file, i|
|
215
202
|
puts "(#{i+1}/#{found_files.size}) bulk importing file #{file}" unless opts[:quiet]
|
216
203
|
payload = File.read(file)
|
217
|
-
|
218
|
-
|
219
|
-
response = http.post("/_bulk", payload)
|
204
|
+
uri = URI(url + "/_bulk")
|
205
|
+
response = api_post(uri, payload, {:headers => {"Content-Type" => "application/x-ndjson"} })
|
220
206
|
if !response.is_a?(Net::HTTPSuccess)
|
221
207
|
raise Error, "HTTP request failure!\n#{response.inspect}\n#{response.body.to_s}"
|
222
208
|
end
|
@@ -235,8 +221,12 @@ module ElasticUtil
|
|
235
221
|
FileUtils.mkdir_p(dir_name)
|
236
222
|
file_name = File.join(dir_name, index_type) + (file_index ? "_#{file_index}" : "") + ".json.data"
|
237
223
|
# prepare record for bulk api injection
|
224
|
+
doc_type = hit['_type']
|
225
|
+
if opts[:replace_types] && opts[:replace_types][doc_type]
|
226
|
+
doc_type = opts[:replace_types][doc_type]
|
227
|
+
end
|
238
228
|
action_json = {'index' => {
|
239
|
-
'_index' => hit['_index'], '_type' =>
|
229
|
+
'_index' => hit['_index'], '_type' => doc_type, '_id' => hit['_id']
|
240
230
|
} }
|
241
231
|
source_json = hit['_source']
|
242
232
|
if opts[:exclude_fields] && source_json
|
@@ -244,6 +234,7 @@ module ElasticUtil
|
|
244
234
|
source_json.delete(field)
|
245
235
|
end
|
246
236
|
end
|
237
|
+
|
247
238
|
File.open(file_name, 'a') do |file|
|
248
239
|
file.write JSON.generate(action_json) + "\n" + JSON.generate(source_json) + "\n"
|
249
240
|
end
|
@@ -251,4 +242,51 @@ module ElasticUtil
|
|
251
242
|
end
|
252
243
|
end
|
253
244
|
|
245
|
+
def self.api_get(uri, opts={})
|
246
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
247
|
+
if uri.scheme == 'https'
|
248
|
+
http.use_ssl = true
|
249
|
+
# todo: always ignore ssl errors for now, but this should be an option
|
250
|
+
# http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
251
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
252
|
+
end
|
253
|
+
http.read_timeout = opts[:read_timeout] || (60*15)
|
254
|
+
http.open_timeout = opts[:open_timeout] || 5
|
255
|
+
request = Net::HTTP::Get.new uri.request_uri
|
256
|
+
if opts[:headers]
|
257
|
+
opts[:headers].each do |k,v|
|
258
|
+
request[k] = v
|
259
|
+
end
|
260
|
+
end
|
261
|
+
if uri.user
|
262
|
+
request.basic_auth uri.user, uri.password
|
263
|
+
end
|
264
|
+
response = http.request(request)
|
265
|
+
return response
|
266
|
+
end
|
267
|
+
|
268
|
+
def self.api_post(uri, payload, opts={})
|
269
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
270
|
+
if uri.scheme == 'https'
|
271
|
+
http.use_ssl = true
|
272
|
+
# todo: always ignore ssl errors for now, but this should be an option
|
273
|
+
# http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
274
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
275
|
+
end
|
276
|
+
http.read_timeout = opts[:read_timeout] || (60*15)
|
277
|
+
http.open_timeout = opts[:open_timeout] || 5
|
278
|
+
request = Net::HTTP::Post.new uri.request_uri
|
279
|
+
if opts[:headers]
|
280
|
+
opts[:headers].each do |k,v|
|
281
|
+
request[k] = v
|
282
|
+
end
|
283
|
+
end
|
284
|
+
if uri.user
|
285
|
+
request.basic_auth uri.user, uri.password
|
286
|
+
end
|
287
|
+
request.body = payload
|
288
|
+
response = http.request(request)
|
289
|
+
return response
|
290
|
+
end
|
291
|
+
|
254
292
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elastic-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Dickson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
75
|
version: '0'
|
76
76
|
requirements: []
|
77
77
|
rubyforge_project:
|
78
|
-
rubygems_version: 2.
|
78
|
+
rubygems_version: 2.7.6
|
79
79
|
signing_key:
|
80
80
|
specification_version: 4
|
81
81
|
summary: Provides backup and restore for ElasticSearch data
|