elasticsearch_s3_backup 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a2652e8865ae1bae268503eacb635d2ce586592
4
- data.tar.gz: 11c55edfc00bdc8abb85aff511c939fe0eb4d9e6
3
+ metadata.gz: f4080ff7b9f8bac6212fe0d240806c810259e6e1
4
+ data.tar.gz: 9669329c6836926d87e08494e1fc9b7c5f596186
5
5
  SHA512:
6
- metadata.gz: e7e337aa678f99dba4ff872761039f15fa511c6d7246d955f562d52a46889e83bd0789ae15ef841419c6f2f8f424c0a757b1c829c0992683050fdf077a85f8fb
7
- data.tar.gz: c27135eab4c5871cee1d48b0ed6f12f516040bf32049d6ac29dd017db3816223f14d1c91d4286052d03b68abfca4fc04ebe4fb259ad27737441396c024975234
6
+ metadata.gz: 183937a0de6ea3ad69f945cd484e9e50f3613ece45bac331685d145aa3faf028547e17f8064f5ada11987147f7c27ff70a6b56544ac4d9411af0263183221988
7
+ data.tar.gz: f81624dffe286fc624630e2c31c439e8f8d01de7d170c3b4e0652803abe3f21410a0f72a9454f92049537469fc4fa9f70e6621869052f828d29f69d6de4c5b1e
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -23,7 +23,10 @@ Gem::Specification.new do |spec|
23
23
  'pushes.'
24
24
  end
25
25
 
26
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+
27
30
  spec.bindir = 'exe'
28
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29
32
  spec.require_paths = ['lib']
@@ -37,4 +40,5 @@ Gem::Specification.new do |spec|
37
40
  spec.add_dependency 'faker'
38
41
  spec.add_dependency 'pagerduty'
39
42
  spec.add_dependency 'sentry-raven'
43
+ spec.add_dependency 'elasticsearch'
40
44
  end
@@ -2,160 +2,111 @@ require 'elasticsearch_s3_backup/version'
2
2
  require 'active_support/time'
3
3
  require 'unirest'
4
4
  require 'logger'
5
- require 'faker'
6
5
  require 'pagerduty'
7
6
  require 'yaml'
8
7
  require 'sentry-raven'
8
+ require 'ostruct'
9
+ require 'elasticsearch'
9
10
 
10
11
  module EverTools
11
12
  class ElasticsearchS3Backup
12
- def conf
13
- @conf ||= YAML.load_file('/etc/s3_backup.yml')
14
- end
15
-
16
- def pagerduty
17
- @pagerduty ||= Pagerduty.new conf['pagerduty_api_key']
18
- end
13
+ extend Forwardable
19
14
 
20
- def configure_sentry!
21
- Raven.configure { config.dsn = conf['sentry_dsn'] }
22
- end
15
+ def_delegators :@conf,
16
+ :pagerduty_api_key,
17
+ :test_size,
18
+ :new_repo_params,
19
+ :sentry_dsn,
20
+ :node_name,
21
+ :elasticsearch_auth_file,
22
+ :cluster_name
23
23
 
24
- def auth
25
- @auth ||= File.read(conf['elasticsearch_auth_file']).strip.split ':'
26
- end
24
+ attr_reader :conf, :backup_repo
27
25
 
28
26
  def initialize
29
- Unirest.default_header 'Accept', 'application/json'
30
- Unirest.default_header 'Content-Type', 'application/json'
31
- Unirest.timeout 30
32
-
33
- @url = 'http://localhost:9200'
27
+ @conf = OpenStruct.new(YAML.load_file('/etc/s3_backup.yml'))
28
+ Raven.configure { config.dsn = sentry_dsn } if sentry_dsn
34
29
 
35
- @backup_index = 'backup_test'
36
- @restore_index = 'restore_test'
37
-
38
- now = Time.new.utc
39
- @monthly = now.strftime '%m-%Y'
40
- @datetime = now.strftime '%m-%d_%H%M'
41
-
42
- @monthly_snap_url = "#{@url}/_snapshot/#{@monthly}"
43
-
44
- @backup_timeout = 1.hours
30
+ now = Time.new.utc
31
+ @backup_test_index = "backup_test_#{now.to_i}"
32
+ @restore_test_index = "restore_test_#{now.to_i}"
33
+ @backup_repo = now.strftime '%m-%Y'
34
+ @snapshot_label = now.strftime '%m-%d_%H%M'
45
35
  end
46
36
 
47
37
  def logger
48
- # Set up logging
49
- @logger ||= Logger.new(conf['log']).tap do |l|
50
- l.level = Logger::INFO
51
- l.progname = 's3_backup'
52
- l.formatter =
53
- proc do |severity, datetime, progname, msg|
54
- "#{datetime.utc} [#{progname}] #{severity}: #{msg}\n"
55
- end
56
- end
38
+ @logger ||= Logger.new(conf['log']).tap { |l| l.progname = 's3_backup' }
57
39
  end
58
40
 
59
- # rubocop:disable Metrics/AbcSize
60
- def es_api(method, uri, params = {})
61
- tries = 3
62
- begin
63
- r = Unirest.send(method, uri, params)
64
- case r.code
65
- when 200..299
66
- return r
67
- when 400..499
68
- logger.debug "#{method} request to #{uri} received #{r.code} (params: #{params.inspect})\n" \
69
- "Body:\n" \
70
- "#{r.body}\n"
71
- return r
72
- end
73
- # byebug
74
- fail "#{method.upcase} request to #{uri} failed (params: #{params.inspect})\n" \
75
- "Response code: #{r.code}\n" \
76
- "Body:\n" \
77
- "#{r.body}\n"
78
- rescue RuntimeError => e
79
- tries -= 1
80
- retry if e.message == 'Request Timeout' && tries > 0
81
- raise e
41
+ def pagerduty
42
+ @pagerduty ||= Pagerduty.new pagerduty_api_key
43
+ end
44
+
45
+ def notify(e)
46
+ if conf['env'] == 'prod'
47
+ pagerduty.trigger(
48
+ 'prod Elasticsearch S3 failed',
49
+ client: node_name,
50
+ details: "#{e.message}\n\n#{e.backtrace}"
51
+ )
82
52
  end
53
+
54
+ Raven.capture_exception(e) if sentry_dsn
83
55
  end
84
- # rubocop:enable Metrics/AbcSize
85
56
 
86
- def master?
87
- es_api(:get, "#{@url}/_cat/master").body[0]['node'] == conf['node_name']
57
+ def auth
58
+ File.read(elasticsearch_auth_file).strip
88
59
  end
89
60
 
90
- # Check if an index exists
91
- def index?(uri)
92
- es_api(:get, "#{@url}/#{uri}").code == 200
61
+ def es_api
62
+ @es_api ||= begin
63
+ es_host = @conf['es_host'] || 'localhost'
64
+ Elasticsearch::Client.new URI "http://#{auth}@#{es_host}:9200"
65
+ end
93
66
  end
94
67
 
95
- # Check if a backup repo exists
96
- def repo?
97
- es_api(:get, @monthly_snap_url).code == 200
68
+ def master?
69
+ es_api.nodes.info['nodes'][es_api.cluster.state['master_node']]['node'] == node_name
98
70
  end
99
71
 
100
- def notify(e)
101
- pagerduty.trigger(
102
- 'prod Elasticsearch S3 failed',
103
- client: conf['node_name'],
104
- details: "#{e.message}\n\n#{e.backtrace}"
105
- )
72
+ def pseudo_random_string
73
+ 'a' + rand(10**100).to_s
106
74
  end
107
75
 
108
76
  def insert_test_data
109
- # Generate some test data using Faker
110
- #
111
- # Uses Bitcoin addresses for their random, hash-like nature
112
- # Creates the `backup_test` index if necessary
113
- # Updates the set of `dummy` documents in the `backup_test` on every run
114
-
115
- logger.info 'Generating test data using Faker…'
116
- conf['test_size'].times do |i|
117
- es_api(
118
- :put,
119
- "#{@url}/#{@backup_index}/dummy/#{i}",
120
- parameters: {
121
- test_value: Faker::Bitcoin.address
122
- }.to_json
77
+ logger.info 'Generating test data using math…'
78
+ test_size.times do |i|
79
+ es_api.create(
80
+ index: @backup_test_index,
81
+ type: 'dummy',
82
+ id: i,
83
+ body: { test_value: pseudo_random_string }
123
84
  )
124
85
  end
125
86
  end
126
87
 
127
- def delete_index(index)
128
- logger.info "Deleting index: #{index}"
129
- es_api(
130
- :delete,
131
- "#{@url}/#{index}",
132
- auth: { user: auth.first, password: auth.last }
133
- )
134
- end
135
-
136
88
  def create_repo
137
- new_repo_params = conf['new_repo_params'].merge(
89
+ new_repo_params = new_repo_params.merge(
138
90
  type: 's3',
139
91
  settings: {
140
- base_path: "/elasticsearch/#{cluster_name}/#{conf['env']}/#{@monthly}",
92
+ base_path: "/elasticsearch/#{cluster_name}/#{conf['env']}/#{@backup_repo}",
141
93
  server_side_encryption: true
142
94
  }
143
95
  )
144
96
 
145
97
  logger.info 'Creating a new monthly ES backup repo…'
146
- es_api(
147
- :put,
148
- @monthly_snap_url,
149
- parameters: new_repo_params.to_json
150
- )
98
+ es_api.snapshot.create_repository repository: @backup_repo,
99
+ body: new_repo_params
151
100
  end
152
101
 
153
102
  def valid_date?(date)
103
+ # rubocop:disable Style/RescueModifier
154
104
  Time.strptime(date, '%m-%Y') rescue false
105
+ # rubocop:enable Style/RescueModifier
155
106
  end
156
107
 
157
108
  def dated_repos
158
- es_api(:get, "#{@url}/_snapshot").body.keys.select { |r| valid_date? r }
109
+ es_api.snapshot.get_repository.keys.select { |r| valid_date? r }
159
110
  end
160
111
 
161
112
  def remove_expired_backups
@@ -163,106 +114,42 @@ module EverTools
163
114
  logger.info "Removing backups older than #{3.months.ago.strftime '%m-%Y'}"
164
115
  dated_repos.select { |b| Time.strptime(b, '%m-%Y') < 3.months.ago }.each do |repo|
165
116
  logger.info "Removing #{repo}"
166
- es_api :delete, "#{@url}/_snapshot/#{repo}"
117
+ es_api.snapshot.delete_repository repository: repo
167
118
  end
168
119
  end
169
120
 
170
- # rubocop:disable Metrics/AbcSize
171
- def snapshot
172
- backup_uri = "#{@monthly_snap_url}/#{@datetime}"
173
- status_req = es_api :get, "#{backup_uri}/_status"
174
- if status_req.code == 404 ||
175
- status_req.body['snapshots'].empty?
176
- fail "Could not find the backup I just created (#{backup_uri})"
177
- end
178
- snapshot = status_req.body['snapshots'].first
179
- logger.info "Backup state: #{snapshot['state']} " \
180
- "(finished shard #{snapshot['shards_stats']['done']} of " \
181
- "#{snapshot['shards_stats']['total']})"
182
- end
183
- # rubocop:enable Metrics/AbcSize
184
-
185
- def backup_complete?
186
- case snapshot['state']
187
- when 'IN_PROGRESS', 'STARTED'
188
- return false
189
- when 'SUCCESS'
190
- return true
191
- end
192
- fail "Backup failed!\n" \
193
- "State: #{snapshot['state']}\n" \
194
- "Response Body: #{status_req.body}"
195
- end
196
-
197
- def verify_create!
198
- # Check the status of the backup for up to an hour
199
- backup_start_time = Time.now.utc
200
- until Time.now.utc > (backup_start_time + @backup_timeout)
201
- return true if backup_complete?
202
- # Don't hammer the status endpoint
203
- sleep 15
204
- end
205
-
206
- fail 'Create timed out'
207
- end
208
-
209
- def make_new_backup
121
+ def create_snapshot
210
122
  # Make a backup (full on new month, incremental otherwise)
211
- logger.info "Starting a new backup (#{@monthly_snap_url}/#{@datetime})…"
212
- es_api :put, "#{@monthly_snap_url}/#{@datetime}"
213
-
214
- # Give the new backup time to show up
215
- sleep 5
216
-
217
- verify_create!
123
+ logger.info "Starting a new backup (#{backup_repo}/#{@snapshot_label})…"
124
+ r = es_api.snapshot.create repository: @backup_repo,
125
+ snapshot: @snapshot_label,
126
+ wait_for_completion: true
127
+ logger.info 'Snapshot complete. Time: ' \
128
+ "#{r['snapshot']['duration_in_millis']}. " \
129
+ "Results: #{r['snapshot']['shards'].inspect}"
130
+ fail "Snapshot failed! #{r.inspect}" if r['snapshot']['failures'].any?
218
131
  end
219
132
 
220
133
  def restore_test_index
221
134
  # Restore just the backup_test index to a new index
222
135
  logger.info 'Restoring the backup_test index…'
223
- es_api(
224
- :post,
225
- "#{@monthly_snap_url}/#{@datetime}/_restore",
226
- parameters: {
227
- indices: @backup_index,
228
- rename_pattern: @backup_index,
229
- rename_replacement: @restore_index
230
- }.to_json
231
- )
232
- verify_restored_index!
233
- end
234
-
235
- def index_shards(index)
236
- r = es_api(:get, "#{@url}/#{index}/_status")
237
- fail "Index #{index} not found" if r.code == 404
238
- r.body.fetch('indices', {}).fetch(index, {})['shards']
239
- end
240
-
241
- def index_online?(index)
242
- shards = index_shards(index)
243
- shards && shards.select { |_k, v| v.find { |n| n['state'] != 'STARTED' } }.empty?
136
+ es_api.snapshot.restore repository: @backup_repo,
137
+ snapshot: @snapshot_label,
138
+ wait_for_completion: true,
139
+ body: {
140
+ indices: @backup_test_index,
141
+ rename_pattern: @backup_test_index,
142
+ rename_replacement: @restore_test_index
143
+ }
244
144
  end
245
145
 
246
146
  def index_item(index, id)
247
- es_api(:get, "#{@url}/#{index}/dummy/#{id}").body
248
- end
249
-
250
- def wait_for_index(index)
251
- until index_online? index
252
- logger.info 'Waiting for restored index to be available…'
253
- sleep 1
254
- end
147
+ es_api.get(index: index, type: 'dummy', id: id)['_source']['test_value']
255
148
  end
256
149
 
257
150
  def compare_index_item!(i)
258
- # Loop until the restored version is available
259
- until index_item(@restore_index, i)['found']
260
- logger.info 'Waiting for restored index to be available…'
261
- sleep 1
262
- end
263
-
264
- backup_item = index_item(@backup_index, i)['_source']['test_value']
265
- restore_item = index_item(@restore_index, i)['_source']['test_value']
151
+ backup_item = index_item(@backup_test_index, i)
152
+ restore_item = index_item(@restore_test_index, i)
266
153
 
267
154
  (backup_item == restore_item) ||
268
155
  fail("Item #{i} in test restore doesn’t match.\n" \
@@ -270,45 +157,37 @@ module EverTools
270
157
  "Restored: #{restore_item}")
271
158
  end
272
159
 
273
- def verify_restored_index!
274
- # Compare each doc in the original backup_test index to the restored index
275
-
276
- logger.info "Verifying the newly-restored #{@backup_index}…"
277
- wait_for_index @restore_index
278
-
279
- conf['test_size'].times { |i| compare_index_item! i }
280
-
281
- logger.info 'Successfully verified the test data!'
160
+ def delete_test_indexes
161
+ [@restore_test_index, @backup_test_index].each do |test_index|
162
+ es_api.indices.delete index: test_index
163
+ end
282
164
  end
283
165
 
284
166
  # rubocop:disable Metrics/AbcSize, Lint/RescueException
285
167
  def run
286
168
  unless master?
287
- logger.info 'This node is not the currently elected master, aborting ' \
288
- 'backup.'
169
+ logger.info 'This node is not the currently elected master. Exiting.'
289
170
  exit 0
290
171
  end
291
172
 
292
- configure_sentry!
293
-
294
- # Remove the previous `restore_test` index, to avoid a race condition
295
- # with checking the restored copy of this index
296
- delete_index @restore_index if index? @restore_index
297
-
298
173
  insert_test_data
299
174
 
300
175
  # Create a new repo if none exists (typically at beginning of month)
301
- create_repo unless repo?
302
- make_new_backup
176
+ create_repo unless es_api.snapshot.get_repository[backup_repo]
177
+ create_snapshot
303
178
  restore_test_index
179
+ # Compare each doc in the original backup_test index to the restored index
180
+ logger.info "Verifying the newly-restored #{@backup_test_index}…"
181
+ test_size.times { |i| compare_index_item! i }
182
+ logger.info 'Successfully verified the test data!'
183
+ delete_test_indexes
304
184
 
305
185
  remove_expired_backups
306
186
  logger.info 'Finished'
307
187
  rescue Exception => e # Need to rescue "Exception" so that Sentry gets it
308
- notify e if conf['env'] == 'prod'
188
+ notify e
309
189
  logger.fatal e.message
310
190
  logger.fatal e.backtrace
311
- Raven.capture_exception(e)
312
191
  raise e
313
192
  end
314
193
  # rubocop:enable Metrics/AbcSize, Lint/RescueException
@@ -1,5 +1,5 @@
1
1
  module EverTools
2
2
  class ElasticsearchS3Backup
3
- VERSION = "1.0.1"
3
+ VERSION = '1.1.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticsearch_s3_backup
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Herot
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-08-24 00:00:00.000000000 Z
11
+ date: 2015-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: elasticsearch
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  description: ''
126
140
  email:
127
141
  - eric.github@herot.com