elasticsearch_s3_backup 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -3
- data/elasticsearch_s3_backup.gemspec +5 -1
- data/lib/elasticsearch_s3_backup.rb +92 -213
- data/lib/elasticsearch_s3_backup/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4080ff7b9f8bac6212fe0d240806c810259e6e1
|
4
|
+
data.tar.gz: 9669329c6836926d87e08494e1fc9b7c5f596186
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 183937a0de6ea3ad69f945cd484e9e50f3613ece45bac331685d145aa3faf028547e17f8064f5ada11987147f7c27ff70a6b56544ac4d9411af0263183221988
|
7
|
+
data.tar.gz: f81624dffe286fc624630e2c31c439e8f8d01de7d170c3b4e0652803abe3f21410a0f72a9454f92049537469fc4fa9f70e6621869052f828d29f69d6de4c5b1e
|
data/Rakefile
CHANGED
@@ -23,7 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
'pushes.'
|
24
24
|
end
|
25
25
|
|
26
|
-
spec.files
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
|
27
30
|
spec.bindir = 'exe'
|
28
31
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
29
32
|
spec.require_paths = ['lib']
|
@@ -37,4 +40,5 @@ Gem::Specification.new do |spec|
|
|
37
40
|
spec.add_dependency 'faker'
|
38
41
|
spec.add_dependency 'pagerduty'
|
39
42
|
spec.add_dependency 'sentry-raven'
|
43
|
+
spec.add_dependency 'elasticsearch'
|
40
44
|
end
|
@@ -2,160 +2,111 @@ require 'elasticsearch_s3_backup/version'
|
|
2
2
|
require 'active_support/time'
|
3
3
|
require 'unirest'
|
4
4
|
require 'logger'
|
5
|
-
require 'faker'
|
6
5
|
require 'pagerduty'
|
7
6
|
require 'yaml'
|
8
7
|
require 'sentry-raven'
|
8
|
+
require 'ostruct'
|
9
|
+
require 'elasticsearch'
|
9
10
|
|
10
11
|
module EverTools
|
11
12
|
class ElasticsearchS3Backup
|
12
|
-
|
13
|
-
@conf ||= YAML.load_file('/etc/s3_backup.yml')
|
14
|
-
end
|
15
|
-
|
16
|
-
def pagerduty
|
17
|
-
@pagerduty ||= Pagerduty.new conf['pagerduty_api_key']
|
18
|
-
end
|
13
|
+
extend Forwardable
|
19
14
|
|
20
|
-
|
21
|
-
|
22
|
-
|
15
|
+
def_delegators :@conf,
|
16
|
+
:pagerduty_api_key,
|
17
|
+
:test_size,
|
18
|
+
:new_repo_params,
|
19
|
+
:sentry_dsn,
|
20
|
+
:node_name,
|
21
|
+
:elasticsearch_auth_file,
|
22
|
+
:cluster_name
|
23
23
|
|
24
|
-
|
25
|
-
@auth ||= File.read(conf['elasticsearch_auth_file']).strip.split ':'
|
26
|
-
end
|
24
|
+
attr_reader :conf, :backup_repo
|
27
25
|
|
28
26
|
def initialize
|
29
|
-
|
30
|
-
|
31
|
-
Unirest.timeout 30
|
32
|
-
|
33
|
-
@url = 'http://localhost:9200'
|
27
|
+
@conf = OpenStruct.new(YAML.load_file('/etc/s3_backup.yml'))
|
28
|
+
Raven.configure { config.dsn = sentry_dsn } if sentry_dsn
|
34
29
|
|
35
|
-
|
36
|
-
@
|
37
|
-
|
38
|
-
|
39
|
-
@
|
40
|
-
@datetime = now.strftime '%m-%d_%H%M'
|
41
|
-
|
42
|
-
@monthly_snap_url = "#{@url}/_snapshot/#{@monthly}"
|
43
|
-
|
44
|
-
@backup_timeout = 1.hours
|
30
|
+
now = Time.new.utc
|
31
|
+
@backup_test_index = "backup_test_#{now.to_i}"
|
32
|
+
@restore_test_index = "restore_test_#{now.to_i}"
|
33
|
+
@backup_repo = now.strftime '%m-%Y'
|
34
|
+
@snapshot_label = now.strftime '%m-%d_%H%M'
|
45
35
|
end
|
46
36
|
|
47
37
|
def logger
|
48
|
-
|
49
|
-
@logger ||= Logger.new(conf['log']).tap do |l|
|
50
|
-
l.level = Logger::INFO
|
51
|
-
l.progname = 's3_backup'
|
52
|
-
l.formatter =
|
53
|
-
proc do |severity, datetime, progname, msg|
|
54
|
-
"#{datetime.utc} [#{progname}] #{severity}: #{msg}\n"
|
55
|
-
end
|
56
|
-
end
|
38
|
+
@logger ||= Logger.new(conf['log']).tap { |l| l.progname = 's3_backup' }
|
57
39
|
end
|
58
40
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
"#{r.body}\n"
|
71
|
-
return r
|
72
|
-
end
|
73
|
-
# byebug
|
74
|
-
fail "#{method.upcase} request to #{uri} failed (params: #{params.inspect})\n" \
|
75
|
-
"Response code: #{r.code}\n" \
|
76
|
-
"Body:\n" \
|
77
|
-
"#{r.body}\n"
|
78
|
-
rescue RuntimeError => e
|
79
|
-
tries -= 1
|
80
|
-
retry if e.message == 'Request Timeout' && tries > 0
|
81
|
-
raise e
|
41
|
+
def pagerduty
|
42
|
+
@pagerduty ||= Pagerduty.new pagerduty_api_key
|
43
|
+
end
|
44
|
+
|
45
|
+
def notify(e)
|
46
|
+
if conf['env'] == 'prod'
|
47
|
+
pagerduty.trigger(
|
48
|
+
'prod Elasticsearch S3 failed',
|
49
|
+
client: node_name,
|
50
|
+
details: "#{e.message}\n\n#{e.backtrace}"
|
51
|
+
)
|
82
52
|
end
|
53
|
+
|
54
|
+
Raven.capture_exception(e) if sentry_dsn
|
83
55
|
end
|
84
|
-
# rubocop:enable Metrics/AbcSize
|
85
56
|
|
86
|
-
def
|
87
|
-
|
57
|
+
def auth
|
58
|
+
File.read(elasticsearch_auth_file).strip
|
88
59
|
end
|
89
60
|
|
90
|
-
|
91
|
-
|
92
|
-
|
61
|
+
def es_api
|
62
|
+
@es_api ||= begin
|
63
|
+
es_host = @conf['es_host'] || 'localhost'
|
64
|
+
Elasticsearch::Client.new URI "http://#{auth}@#{es_host}:9200"
|
65
|
+
end
|
93
66
|
end
|
94
67
|
|
95
|
-
|
96
|
-
|
97
|
-
es_api(:get, @monthly_snap_url).code == 200
|
68
|
+
def master?
|
69
|
+
es_api.nodes.info['nodes'][es_api.cluster.state['master_node']]['node'] == node_name
|
98
70
|
end
|
99
71
|
|
100
|
-
def
|
101
|
-
|
102
|
-
'prod Elasticsearch S3 failed',
|
103
|
-
client: conf['node_name'],
|
104
|
-
details: "#{e.message}\n\n#{e.backtrace}"
|
105
|
-
)
|
72
|
+
def pseudo_random_string
|
73
|
+
'a' + rand(10**100).to_s
|
106
74
|
end
|
107
75
|
|
108
76
|
def insert_test_data
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
conf['test_size'].times do |i|
|
117
|
-
es_api(
|
118
|
-
:put,
|
119
|
-
"#{@url}/#{@backup_index}/dummy/#{i}",
|
120
|
-
parameters: {
|
121
|
-
test_value: Faker::Bitcoin.address
|
122
|
-
}.to_json
|
77
|
+
logger.info 'Generating test data using math…'
|
78
|
+
test_size.times do |i|
|
79
|
+
es_api.create(
|
80
|
+
index: @backup_test_index,
|
81
|
+
type: 'dummy',
|
82
|
+
id: i,
|
83
|
+
body: { test_value: pseudo_random_string }
|
123
84
|
)
|
124
85
|
end
|
125
86
|
end
|
126
87
|
|
127
|
-
def delete_index(index)
|
128
|
-
logger.info "Deleting index: #{index}"
|
129
|
-
es_api(
|
130
|
-
:delete,
|
131
|
-
"#{@url}/#{index}",
|
132
|
-
auth: { user: auth.first, password: auth.last }
|
133
|
-
)
|
134
|
-
end
|
135
|
-
|
136
88
|
def create_repo
|
137
|
-
new_repo_params =
|
89
|
+
new_repo_params = new_repo_params.merge(
|
138
90
|
type: 's3',
|
139
91
|
settings: {
|
140
|
-
base_path: "/elasticsearch/#{cluster_name}/#{conf['env']}/#{@
|
92
|
+
base_path: "/elasticsearch/#{cluster_name}/#{conf['env']}/#{@backup_repo}",
|
141
93
|
server_side_encryption: true
|
142
94
|
}
|
143
95
|
)
|
144
96
|
|
145
97
|
logger.info 'Creating a new monthly ES backup repo…'
|
146
|
-
es_api
|
147
|
-
|
148
|
-
@monthly_snap_url,
|
149
|
-
parameters: new_repo_params.to_json
|
150
|
-
)
|
98
|
+
es_api.snapshot.create_repository repository: @backup_repo,
|
99
|
+
body: new_repo_params
|
151
100
|
end
|
152
101
|
|
153
102
|
def valid_date?(date)
|
103
|
+
# rubocop:disable Style/RescueModifier
|
154
104
|
Time.strptime(date, '%m-%Y') rescue false
|
105
|
+
# rubocop:enable Style/RescueModifier
|
155
106
|
end
|
156
107
|
|
157
108
|
def dated_repos
|
158
|
-
es_api
|
109
|
+
es_api.snapshot.get_repository.keys.select { |r| valid_date? r }
|
159
110
|
end
|
160
111
|
|
161
112
|
def remove_expired_backups
|
@@ -163,106 +114,42 @@ module EverTools
|
|
163
114
|
logger.info "Removing backups older than #{3.months.ago.strftime '%m-%Y'}"
|
164
115
|
dated_repos.select { |b| Time.strptime(b, '%m-%Y') < 3.months.ago }.each do |repo|
|
165
116
|
logger.info "Removing #{repo}"
|
166
|
-
es_api :
|
117
|
+
es_api.snapshot.delete_repository repository: repo
|
167
118
|
end
|
168
119
|
end
|
169
120
|
|
170
|
-
|
171
|
-
def snapshot
|
172
|
-
backup_uri = "#{@monthly_snap_url}/#{@datetime}"
|
173
|
-
status_req = es_api :get, "#{backup_uri}/_status"
|
174
|
-
if status_req.code == 404 ||
|
175
|
-
status_req.body['snapshots'].empty?
|
176
|
-
fail "Could not find the backup I just created (#{backup_uri})"
|
177
|
-
end
|
178
|
-
snapshot = status_req.body['snapshots'].first
|
179
|
-
logger.info "Backup state: #{snapshot['state']} " \
|
180
|
-
"(finished shard #{snapshot['shards_stats']['done']} of " \
|
181
|
-
"#{snapshot['shards_stats']['total']})"
|
182
|
-
end
|
183
|
-
# rubocop:enable Metrics/AbcSize
|
184
|
-
|
185
|
-
def backup_complete?
|
186
|
-
case snapshot['state']
|
187
|
-
when 'IN_PROGRESS', 'STARTED'
|
188
|
-
return false
|
189
|
-
when 'SUCCESS'
|
190
|
-
return true
|
191
|
-
end
|
192
|
-
fail "Backup failed!\n" \
|
193
|
-
"State: #{snapshot['state']}\n" \
|
194
|
-
"Response Body: #{status_req.body}"
|
195
|
-
end
|
196
|
-
|
197
|
-
def verify_create!
|
198
|
-
# Check the status of the backup for up to an hour
|
199
|
-
backup_start_time = Time.now.utc
|
200
|
-
until Time.now.utc > (backup_start_time + @backup_timeout)
|
201
|
-
return true if backup_complete?
|
202
|
-
# Don't hammer the status endpoint
|
203
|
-
sleep 15
|
204
|
-
end
|
205
|
-
|
206
|
-
fail 'Create timed out'
|
207
|
-
end
|
208
|
-
|
209
|
-
def make_new_backup
|
121
|
+
def create_snapshot
|
210
122
|
# Make a backup (full on new month, incremental otherwise)
|
211
|
-
logger.info "Starting a new backup (#{
|
212
|
-
es_api :
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
123
|
+
logger.info "Starting a new backup (#{backup_repo}/#{@snapshot_label})…"
|
124
|
+
r = es_api.snapshot.create repository: @backup_repo,
|
125
|
+
snapshot: @snapshot_label,
|
126
|
+
wait_for_completion: true
|
127
|
+
logger.info 'Snapshot complete. Time: ' \
|
128
|
+
"#{r['snapshot']['duration_in_millis']}. " \
|
129
|
+
"Results: #{r['snapshot']['shards'].inspect}"
|
130
|
+
fail "Snapshot failed! #{r.inspect}" if r['snapshot']['failures'].any?
|
218
131
|
end
|
219
132
|
|
220
133
|
def restore_test_index
|
221
134
|
# Restore just the backup_test index to a new index
|
222
135
|
logger.info 'Restoring the backup_test index…'
|
223
|
-
es_api
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
)
|
232
|
-
verify_restored_index!
|
233
|
-
end
|
234
|
-
|
235
|
-
def index_shards(index)
|
236
|
-
r = es_api(:get, "#{@url}/#{index}/_status")
|
237
|
-
fail "Index #{index} not found" if r.code == 404
|
238
|
-
r.body.fetch('indices', {}).fetch(index, {})['shards']
|
239
|
-
end
|
240
|
-
|
241
|
-
def index_online?(index)
|
242
|
-
shards = index_shards(index)
|
243
|
-
shards && shards.select { |_k, v| v.find { |n| n['state'] != 'STARTED' } }.empty?
|
136
|
+
es_api.snapshot.restore repository: @backup_repo,
|
137
|
+
snapshot: @snapshot_label,
|
138
|
+
wait_for_completion: true,
|
139
|
+
body: {
|
140
|
+
indices: @backup_test_index,
|
141
|
+
rename_pattern: @backup_test_index,
|
142
|
+
rename_replacement: @restore_test_index
|
143
|
+
}
|
244
144
|
end
|
245
145
|
|
246
146
|
def index_item(index, id)
|
247
|
-
es_api(:
|
248
|
-
end
|
249
|
-
|
250
|
-
def wait_for_index(index)
|
251
|
-
until index_online? index
|
252
|
-
logger.info 'Waiting for restored index to be available…'
|
253
|
-
sleep 1
|
254
|
-
end
|
147
|
+
es_api.get(index: index, type: 'dummy', id: id)['_source']['test_value']
|
255
148
|
end
|
256
149
|
|
257
150
|
def compare_index_item!(i)
|
258
|
-
|
259
|
-
|
260
|
-
logger.info 'Waiting for restored index to be available…'
|
261
|
-
sleep 1
|
262
|
-
end
|
263
|
-
|
264
|
-
backup_item = index_item(@backup_index, i)['_source']['test_value']
|
265
|
-
restore_item = index_item(@restore_index, i)['_source']['test_value']
|
151
|
+
backup_item = index_item(@backup_test_index, i)
|
152
|
+
restore_item = index_item(@restore_test_index, i)
|
266
153
|
|
267
154
|
(backup_item == restore_item) ||
|
268
155
|
fail("Item #{i} in test restore doesn’t match.\n" \
|
@@ -270,45 +157,37 @@ module EverTools
|
|
270
157
|
"Restored: #{restore_item}")
|
271
158
|
end
|
272
159
|
|
273
|
-
def
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
wait_for_index @restore_index
|
278
|
-
|
279
|
-
conf['test_size'].times { |i| compare_index_item! i }
|
280
|
-
|
281
|
-
logger.info 'Successfully verified the test data!'
|
160
|
+
def delete_test_indexes
|
161
|
+
[@restore_test_index, @backup_test_index].each do |test_index|
|
162
|
+
es_api.indices.delete index: test_index
|
163
|
+
end
|
282
164
|
end
|
283
165
|
|
284
166
|
# rubocop:disable Metrics/AbcSize, Lint/RescueException
|
285
167
|
def run
|
286
168
|
unless master?
|
287
|
-
logger.info 'This node is not the currently elected master
|
288
|
-
'backup.'
|
169
|
+
logger.info 'This node is not the currently elected master. Exiting.'
|
289
170
|
exit 0
|
290
171
|
end
|
291
172
|
|
292
|
-
configure_sentry!
|
293
|
-
|
294
|
-
# Remove the previous `restore_test` index, to avoid a race condition
|
295
|
-
# with checking the restored copy of this index
|
296
|
-
delete_index @restore_index if index? @restore_index
|
297
|
-
|
298
173
|
insert_test_data
|
299
174
|
|
300
175
|
# Create a new repo if none exists (typically at beginning of month)
|
301
|
-
create_repo unless
|
302
|
-
|
176
|
+
create_repo unless es_api.snapshot.get_repository[backup_repo]
|
177
|
+
create_snapshot
|
303
178
|
restore_test_index
|
179
|
+
# Compare each doc in the original backup_test index to the restored index
|
180
|
+
logger.info "Verifying the newly-restored #{@backup_test_index}…"
|
181
|
+
test_size.times { |i| compare_index_item! i }
|
182
|
+
logger.info 'Successfully verified the test data!'
|
183
|
+
delete_test_indexes
|
304
184
|
|
305
185
|
remove_expired_backups
|
306
186
|
logger.info 'Finished'
|
307
187
|
rescue Exception => e # Need to rescue "Exception" so that Sentry gets it
|
308
|
-
notify e
|
188
|
+
notify e
|
309
189
|
logger.fatal e.message
|
310
190
|
logger.fatal e.backtrace
|
311
|
-
Raven.capture_exception(e)
|
312
191
|
raise e
|
313
192
|
end
|
314
193
|
# rubocop:enable Metrics/AbcSize, Lint/RescueException
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticsearch_s3_backup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Herot
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-08-
|
11
|
+
date: 2015-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: elasticsearch
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
description: ''
|
126
140
|
email:
|
127
141
|
- eric.github@herot.com
|