elasticsearch_s3_backup 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a2652e8865ae1bae268503eacb635d2ce586592
4
- data.tar.gz: 11c55edfc00bdc8abb85aff511c939fe0eb4d9e6
3
+ metadata.gz: f4080ff7b9f8bac6212fe0d240806c810259e6e1
4
+ data.tar.gz: 9669329c6836926d87e08494e1fc9b7c5f596186
5
5
  SHA512:
6
- metadata.gz: e7e337aa678f99dba4ff872761039f15fa511c6d7246d955f562d52a46889e83bd0789ae15ef841419c6f2f8f424c0a757b1c829c0992683050fdf077a85f8fb
7
- data.tar.gz: c27135eab4c5871cee1d48b0ed6f12f516040bf32049d6ac29dd017db3816223f14d1c91d4286052d03b68abfca4fc04ebe4fb259ad27737441396c024975234
6
+ metadata.gz: 183937a0de6ea3ad69f945cd484e9e50f3613ece45bac331685d145aa3faf028547e17f8064f5ada11987147f7c27ff70a6b56544ac4d9411af0263183221988
7
+ data.tar.gz: f81624dffe286fc624630e2c31c439e8f8d01de7d170c3b4e0652803abe3f21410a0f72a9454f92049537469fc4fa9f70e6621869052f828d29f69d6de4c5b1e
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -23,7 +23,10 @@ Gem::Specification.new do |spec|
23
23
  'pushes.'
24
24
  end
25
25
 
26
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+
27
30
  spec.bindir = 'exe'
28
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29
32
  spec.require_paths = ['lib']
@@ -37,4 +40,5 @@ Gem::Specification.new do |spec|
37
40
  spec.add_dependency 'faker'
38
41
  spec.add_dependency 'pagerduty'
39
42
  spec.add_dependency 'sentry-raven'
43
+ spec.add_dependency 'elasticsearch'
40
44
  end
@@ -2,160 +2,111 @@ require 'elasticsearch_s3_backup/version'
2
2
  require 'active_support/time'
3
3
  require 'unirest'
4
4
  require 'logger'
5
- require 'faker'
6
5
  require 'pagerduty'
7
6
  require 'yaml'
8
7
  require 'sentry-raven'
8
+ require 'ostruct'
9
+ require 'elasticsearch'
9
10
 
10
11
  module EverTools
11
12
  class ElasticsearchS3Backup
12
- def conf
13
- @conf ||= YAML.load_file('/etc/s3_backup.yml')
14
- end
15
-
16
- def pagerduty
17
- @pagerduty ||= Pagerduty.new conf['pagerduty_api_key']
18
- end
13
+ extend Forwardable
19
14
 
20
- def configure_sentry!
21
- Raven.configure { config.dsn = conf['sentry_dsn'] }
22
- end
15
+ def_delegators :@conf,
16
+ :pagerduty_api_key,
17
+ :test_size,
18
+ :new_repo_params,
19
+ :sentry_dsn,
20
+ :node_name,
21
+ :elasticsearch_auth_file,
22
+ :cluster_name
23
23
 
24
- def auth
25
- @auth ||= File.read(conf['elasticsearch_auth_file']).strip.split ':'
26
- end
24
+ attr_reader :conf, :backup_repo
27
25
 
28
26
  def initialize
29
- Unirest.default_header 'Accept', 'application/json'
30
- Unirest.default_header 'Content-Type', 'application/json'
31
- Unirest.timeout 30
32
-
33
- @url = 'http://localhost:9200'
27
+ @conf = OpenStruct.new(YAML.load_file('/etc/s3_backup.yml'))
28
+ Raven.configure { config.dsn = sentry_dsn } if sentry_dsn
34
29
 
35
- @backup_index = 'backup_test'
36
- @restore_index = 'restore_test'
37
-
38
- now = Time.new.utc
39
- @monthly = now.strftime '%m-%Y'
40
- @datetime = now.strftime '%m-%d_%H%M'
41
-
42
- @monthly_snap_url = "#{@url}/_snapshot/#{@monthly}"
43
-
44
- @backup_timeout = 1.hours
30
+ now = Time.new.utc
31
+ @backup_test_index = "backup_test_#{now.to_i}"
32
+ @restore_test_index = "restore_test_#{now.to_i}"
33
+ @backup_repo = now.strftime '%m-%Y'
34
+ @snapshot_label = now.strftime '%m-%d_%H%M'
45
35
  end
46
36
 
47
37
  def logger
48
- # Set up logging
49
- @logger ||= Logger.new(conf['log']).tap do |l|
50
- l.level = Logger::INFO
51
- l.progname = 's3_backup'
52
- l.formatter =
53
- proc do |severity, datetime, progname, msg|
54
- "#{datetime.utc} [#{progname}] #{severity}: #{msg}\n"
55
- end
56
- end
38
+ @logger ||= Logger.new(conf['log']).tap { |l| l.progname = 's3_backup' }
57
39
  end
58
40
 
59
- # rubocop:disable Metrics/AbcSize
60
- def es_api(method, uri, params = {})
61
- tries = 3
62
- begin
63
- r = Unirest.send(method, uri, params)
64
- case r.code
65
- when 200..299
66
- return r
67
- when 400..499
68
- logger.debug "#{method} request to #{uri} received #{r.code} (params: #{params.inspect})\n" \
69
- "Body:\n" \
70
- "#{r.body}\n"
71
- return r
72
- end
73
- # byebug
74
- fail "#{method.upcase} request to #{uri} failed (params: #{params.inspect})\n" \
75
- "Response code: #{r.code}\n" \
76
- "Body:\n" \
77
- "#{r.body}\n"
78
- rescue RuntimeError => e
79
- tries -= 1
80
- retry if e.message == 'Request Timeout' && tries > 0
81
- raise e
41
+ def pagerduty
42
+ @pagerduty ||= Pagerduty.new pagerduty_api_key
43
+ end
44
+
45
+ def notify(e)
46
+ if conf['env'] == 'prod'
47
+ pagerduty.trigger(
48
+ 'prod Elasticsearch S3 failed',
49
+ client: node_name,
50
+ details: "#{e.message}\n\n#{e.backtrace}"
51
+ )
82
52
  end
53
+
54
+ Raven.capture_exception(e) if sentry_dsn
83
55
  end
84
- # rubocop:enable Metrics/AbcSize
85
56
 
86
- def master?
87
- es_api(:get, "#{@url}/_cat/master").body[0]['node'] == conf['node_name']
57
+ def auth
58
+ File.read(elasticsearch_auth_file).strip
88
59
  end
89
60
 
90
- # Check if an index exists
91
- def index?(uri)
92
- es_api(:get, "#{@url}/#{uri}").code == 200
61
+ def es_api
62
+ @es_api ||= begin
63
+ es_host = @conf['es_host'] || 'localhost'
64
+ Elasticsearch::Client.new URI "http://#{auth}@#{es_host}:9200"
65
+ end
93
66
  end
94
67
 
95
- # Check if a backup repo exists
96
- def repo?
97
- es_api(:get, @monthly_snap_url).code == 200
68
+ def master?
69
+ es_api.nodes.info['nodes'][es_api.cluster.state['master_node']]['node'] == node_name
98
70
  end
99
71
 
100
- def notify(e)
101
- pagerduty.trigger(
102
- 'prod Elasticsearch S3 failed',
103
- client: conf['node_name'],
104
- details: "#{e.message}\n\n#{e.backtrace}"
105
- )
72
+ def pseudo_random_string
73
+ 'a' + rand(10**100).to_s
106
74
  end
107
75
 
108
76
  def insert_test_data
109
- # Generate some test data using Faker
110
- #
111
- # Uses Bitcoin addresses for their random, hash-like nature
112
- # Creates the `backup_test` index if necessary
113
- # Updates the set of `dummy` documents in the `backup_test` on every run
114
-
115
- logger.info 'Generating test data using Faker…'
116
- conf['test_size'].times do |i|
117
- es_api(
118
- :put,
119
- "#{@url}/#{@backup_index}/dummy/#{i}",
120
- parameters: {
121
- test_value: Faker::Bitcoin.address
122
- }.to_json
77
+ logger.info 'Generating test data using math…'
78
+ test_size.times do |i|
79
+ es_api.create(
80
+ index: @backup_test_index,
81
+ type: 'dummy',
82
+ id: i,
83
+ body: { test_value: pseudo_random_string }
123
84
  )
124
85
  end
125
86
  end
126
87
 
127
- def delete_index(index)
128
- logger.info "Deleting index: #{index}"
129
- es_api(
130
- :delete,
131
- "#{@url}/#{index}",
132
- auth: { user: auth.first, password: auth.last }
133
- )
134
- end
135
-
136
88
  def create_repo
137
- new_repo_params = conf['new_repo_params'].merge(
89
+ new_repo_params = new_repo_params.merge(
138
90
  type: 's3',
139
91
  settings: {
140
- base_path: "/elasticsearch/#{cluster_name}/#{conf['env']}/#{@monthly}",
92
+ base_path: "/elasticsearch/#{cluster_name}/#{conf['env']}/#{@backup_repo}",
141
93
  server_side_encryption: true
142
94
  }
143
95
  )
144
96
 
145
97
  logger.info 'Creating a new monthly ES backup repo…'
146
- es_api(
147
- :put,
148
- @monthly_snap_url,
149
- parameters: new_repo_params.to_json
150
- )
98
+ es_api.snapshot.create_repository repository: @backup_repo,
99
+ body: new_repo_params
151
100
  end
152
101
 
153
102
  def valid_date?(date)
103
+ # rubocop:disable Style/RescueModifier
154
104
  Time.strptime(date, '%m-%Y') rescue false
105
+ # rubocop:enable Style/RescueModifier
155
106
  end
156
107
 
157
108
  def dated_repos
158
- es_api(:get, "#{@url}/_snapshot").body.keys.select { |r| valid_date? r }
109
+ es_api.snapshot.get_repository.keys.select { |r| valid_date? r }
159
110
  end
160
111
 
161
112
  def remove_expired_backups
@@ -163,106 +114,42 @@ module EverTools
163
114
  logger.info "Removing backups older than #{3.months.ago.strftime '%m-%Y'}"
164
115
  dated_repos.select { |b| Time.strptime(b, '%m-%Y') < 3.months.ago }.each do |repo|
165
116
  logger.info "Removing #{repo}"
166
- es_api :delete, "#{@url}/_snapshot/#{repo}"
117
+ es_api.snapshot.delete_repository repository: repo
167
118
  end
168
119
  end
169
120
 
170
- # rubocop:disable Metrics/AbcSize
171
- def snapshot
172
- backup_uri = "#{@monthly_snap_url}/#{@datetime}"
173
- status_req = es_api :get, "#{backup_uri}/_status"
174
- if status_req.code == 404 ||
175
- status_req.body['snapshots'].empty?
176
- fail "Could not find the backup I just created (#{backup_uri})"
177
- end
178
- snapshot = status_req.body['snapshots'].first
179
- logger.info "Backup state: #{snapshot['state']} " \
180
- "(finished shard #{snapshot['shards_stats']['done']} of " \
181
- "#{snapshot['shards_stats']['total']})"
182
- end
183
- # rubocop:enable Metrics/AbcSize
184
-
185
- def backup_complete?
186
- case snapshot['state']
187
- when 'IN_PROGRESS', 'STARTED'
188
- return false
189
- when 'SUCCESS'
190
- return true
191
- end
192
- fail "Backup failed!\n" \
193
- "State: #{snapshot['state']}\n" \
194
- "Response Body: #{status_req.body}"
195
- end
196
-
197
- def verify_create!
198
- # Check the status of the backup for up to an hour
199
- backup_start_time = Time.now.utc
200
- until Time.now.utc > (backup_start_time + @backup_timeout)
201
- return true if backup_complete?
202
- # Don't hammer the status endpoint
203
- sleep 15
204
- end
205
-
206
- fail 'Create timed out'
207
- end
208
-
209
- def make_new_backup
121
+ def create_snapshot
210
122
  # Make a backup (full on new month, incremental otherwise)
211
- logger.info "Starting a new backup (#{@monthly_snap_url}/#{@datetime})…"
212
- es_api :put, "#{@monthly_snap_url}/#{@datetime}"
213
-
214
- # Give the new backup time to show up
215
- sleep 5
216
-
217
- verify_create!
123
+ logger.info "Starting a new backup (#{backup_repo}/#{@snapshot_label})…"
124
+ r = es_api.snapshot.create repository: @backup_repo,
125
+ snapshot: @snapshot_label,
126
+ wait_for_completion: true
127
+ logger.info 'Snapshot complete. Time: ' \
128
+ "#{r['snapshot']['duration_in_millis']}. " \
129
+ "Results: #{r['snapshot']['shards'].inspect}"
130
+ fail "Snapshot failed! #{r.inspect}" if r['snapshot']['failures'].any?
218
131
  end
219
132
 
220
133
  def restore_test_index
221
134
  # Restore just the backup_test index to a new index
222
135
  logger.info 'Restoring the backup_test index…'
223
- es_api(
224
- :post,
225
- "#{@monthly_snap_url}/#{@datetime}/_restore",
226
- parameters: {
227
- indices: @backup_index,
228
- rename_pattern: @backup_index,
229
- rename_replacement: @restore_index
230
- }.to_json
231
- )
232
- verify_restored_index!
233
- end
234
-
235
- def index_shards(index)
236
- r = es_api(:get, "#{@url}/#{index}/_status")
237
- fail "Index #{index} not found" if r.code == 404
238
- r.body.fetch('indices', {}).fetch(index, {})['shards']
239
- end
240
-
241
- def index_online?(index)
242
- shards = index_shards(index)
243
- shards && shards.select { |_k, v| v.find { |n| n['state'] != 'STARTED' } }.empty?
136
+ es_api.snapshot.restore repository: @backup_repo,
137
+ snapshot: @snapshot_label,
138
+ wait_for_completion: true,
139
+ body: {
140
+ indices: @backup_test_index,
141
+ rename_pattern: @backup_test_index,
142
+ rename_replacement: @restore_test_index
143
+ }
244
144
  end
245
145
 
246
146
  def index_item(index, id)
247
- es_api(:get, "#{@url}/#{index}/dummy/#{id}").body
248
- end
249
-
250
- def wait_for_index(index)
251
- until index_online? index
252
- logger.info 'Waiting for restored index to be available…'
253
- sleep 1
254
- end
147
+ es_api.get(index: index, type: 'dummy', id: id)['_source']['test_value']
255
148
  end
256
149
 
257
150
  def compare_index_item!(i)
258
- # Loop until the restored version is available
259
- until index_item(@restore_index, i)['found']
260
- logger.info 'Waiting for restored index to be available…'
261
- sleep 1
262
- end
263
-
264
- backup_item = index_item(@backup_index, i)['_source']['test_value']
265
- restore_item = index_item(@restore_index, i)['_source']['test_value']
151
+ backup_item = index_item(@backup_test_index, i)
152
+ restore_item = index_item(@restore_test_index, i)
266
153
 
267
154
  (backup_item == restore_item) ||
268
155
  fail("Item #{i} in test restore doesn’t match.\n" \
@@ -270,45 +157,37 @@ module EverTools
270
157
  "Restored: #{restore_item}")
271
158
  end
272
159
 
273
- def verify_restored_index!
274
- # Compare each doc in the original backup_test index to the restored index
275
-
276
- logger.info "Verifying the newly-restored #{@backup_index}…"
277
- wait_for_index @restore_index
278
-
279
- conf['test_size'].times { |i| compare_index_item! i }
280
-
281
- logger.info 'Successfully verified the test data!'
160
+ def delete_test_indexes
161
+ [@restore_test_index, @backup_test_index].each do |test_index|
162
+ es_api.indices.delete index: test_index
163
+ end
282
164
  end
283
165
 
284
166
  # rubocop:disable Metrics/AbcSize, Lint/RescueException
285
167
  def run
286
168
  unless master?
287
- logger.info 'This node is not the currently elected master, aborting ' \
288
- 'backup.'
169
+ logger.info 'This node is not the currently elected master. Exiting.'
289
170
  exit 0
290
171
  end
291
172
 
292
- configure_sentry!
293
-
294
- # Remove the previous `restore_test` index, to avoid a race condition
295
- # with checking the restored copy of this index
296
- delete_index @restore_index if index? @restore_index
297
-
298
173
  insert_test_data
299
174
 
300
175
  # Create a new repo if none exists (typically at beginning of month)
301
- create_repo unless repo?
302
- make_new_backup
176
+ create_repo unless es_api.snapshot.get_repository[backup_repo]
177
+ create_snapshot
303
178
  restore_test_index
179
+ # Compare each doc in the original backup_test index to the restored index
180
+ logger.info "Verifying the newly-restored #{@backup_test_index}…"
181
+ test_size.times { |i| compare_index_item! i }
182
+ logger.info 'Successfully verified the test data!'
183
+ delete_test_indexes
304
184
 
305
185
  remove_expired_backups
306
186
  logger.info 'Finished'
307
187
  rescue Exception => e # Need to rescue "Exception" so that Sentry gets it
308
- notify e if conf['env'] == 'prod'
188
+ notify e
309
189
  logger.fatal e.message
310
190
  logger.fatal e.backtrace
311
- Raven.capture_exception(e)
312
191
  raise e
313
192
  end
314
193
  # rubocop:enable Metrics/AbcSize, Lint/RescueException
@@ -1,5 +1,5 @@
1
1
  module EverTools
2
2
  class ElasticsearchS3Backup
3
- VERSION = "1.0.1"
3
+ VERSION = '1.1.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch_s3_backup
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Herot
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-08-24 00:00:00.000000000 Z
11
+ date: 2015-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: elasticsearch
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  description: ''
126
140
  email:
127
141
  - eric.github@herot.com