data-exporter 1.3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 29fcc31fa3466058bc785fe88e2e1acadec677d7
4
+ data.tar.gz: 8a41c38d5c0383ce63ae6290f88406e82f32c279
5
+ SHA512:
6
+ metadata.gz: 9ac800e453a9623a806dc87b87d7a8878d5ab78c59b36a7c9d84a46524c3b2567713fce70e3e05af5039bb205711c5f9ed2043f401f3e98ca3a6c7260601b892
7
+ data.tar.gz: d490349507e86a2f597e9ab0e58ed1fccb6d402516d3566be748b3abfd53bc583f244e73a60674b00cb94575442e3e7968f4b99979e0187ecf1982561e0e17ba
@@ -0,0 +1,40 @@
1
+ DataExporter
2
+ ------------
3
+ Use to export and import MySQL databases to S3.
4
+
5
+ To export a MySQL database:
6
+
7
+ 1. Add gem to Gemfile
8
+ 2. Create a YAML or ERB configuration that `data-exporter` can read:
9
+ ```yaml
10
+ export_dir: '/tmp'
11
+ backup_dir: '/backups'
12
+ backup_key: 'config/backup_key' # openssl encryption key file
13
+ mysql:
14
+ adapter: 'mysql2'
15
+ host: 'localhost'
16
+ database: 'centurion_development'
17
+ username: 'root'
18
+ password: ''
19
+ s3:
20
+ access_key_id: 'ACCESS_KEY_ID'
21
+ secret_access_key: 'SECRET_ACESS_KEY'
22
+ bucket_name: 'socialcast_backups'
23
+ prefix: 'centurion_development'
24
+ ```
25
+
26
+ 3. Execute:
27
+ ```shell
28
+ bundle exec data-exporter export -f etc/data_exporter.yml --csv
29
+ ```
30
+
31
+
32
+ To import a MySQL database as a directory of CSV files:
33
+ ```shell
34
+ bundle exec data-exporter unpack -f etc/data_exporter.yml --csv
35
+ ```
36
+
37
+ To import a MySQL database backup via --sftp:
38
+ ```shell
39
+ bundle exec data-exporter unpack -f etc/data_exporter.yml --csv --sftp
40
+ ```
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env rake
2
+
3
+ begin
4
+ require 'bundler/setup'
5
+ rescue LoadError
6
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
7
+ end
8
+
9
+ Bundler::GemHelper.install_tasks
10
+
11
+ require 'rspec/core/rake_task'
12
+
13
+ RSpec::Core::RakeTask.new(:spec)
14
+
15
+ task :default => :spec
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.expand_path('../../lib/', __FILE__)
3
+ require 'data_exporter'
4
+ DataExporter::CLI.start
@@ -0,0 +1,21 @@
1
+ module DataExporter
2
+ require 'data_exporter/configuration'
3
+ require 'data_exporter/archive'
4
+ require 'data_exporter/actions'
5
+ require 'data_exporter/cli'
6
+
7
+ class << self
8
+ def configuration
9
+ @configuration ||= DataExporter::Configuration.new
10
+ end
11
+ alias :config :configuration
12
+
13
+ def configure(&block)
14
+ yield configuration
15
+ end
16
+
17
+ def database_connection
18
+ ActiveRecord::Base.establish_connection(configuration.database)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,321 @@
1
+ require 'active_record'
2
+ require 'csv'
3
+ require 'open3'
4
+ require 'net/sftp'
5
+ require 'aws/s3'
6
+ require 'ostruct'
7
+
8
+ module DataExporter
9
+ IGNORED_TABLES = %w(schema_migrations checksums)
10
+
11
+ module Actions
12
+ def decrypt(in_file_name = '-')
13
+ ['openssl', 'enc', '-d', '-aes-256-cbc', '-salt', '-pass', "file:#{config.backup_key}", '-in', in_file_name]
14
+ end
15
+
16
+ def expand(file_name = '-')
17
+ ['gunzip', file_name]
18
+ end
19
+
20
+ def unarchive(file_name = '-')
21
+ ['tar', '-xf', file_name]
22
+ end
23
+
24
+ def compress(file_name = '-')
25
+ ['gzip', '--to-stdout', file_name]
26
+ end
27
+
28
+ def encrypt(backup_key)
29
+ ['openssl', 'enc', '-aes-256-cbc', '-salt', '-pass', "file:#{backup_key}"]
30
+ end
31
+
32
+ def unpack(encrypted_file, unpack_dir)
33
+ case encrypted_file
34
+ when /\.tar\.gz\.enc\z/
35
+ unpack_encrypted_archive(encrypted_file, unpack_dir)
36
+ when /\.sql\.gz\.enc\z/
37
+ unpack_encrypted_file(encrypted_file, unpack_dir)
38
+ end
39
+ end
40
+
41
+ def export(backup_key, encrypted_file, archive_dir = nil)
42
+ case encrypted_file
43
+ when /\.tar\.gz\.enc\z/
44
+ export_encrypted_archive(backup_key, encrypted_file, archive_dir)
45
+ when /\.sql\.gz\.enc\z/
46
+ export_encrypted_file(backup_key, encrypted_file)
47
+ end
48
+ end
49
+
50
+ def config
51
+ DataExporter.config
52
+ end
53
+
54
+ def find_last_backup(prefix, suffix, backup_dir)
55
+ if config.sftp_enabled?
56
+ find_last_sftp_backup(prefix, suffix, backup_dir)
57
+ else
58
+ find_last_s3_backup(prefix, suffix, backup_dir)
59
+ end
60
+ end
61
+
62
+ def find_last_sftp_backup(prefix, suffix, backup_dir = '/')
63
+ backup_entry = nil
64
+ sftp.dir.glob(backup_dir, prefix + '*' + suffix) do |entry|
65
+ backup_entry ||= entry
66
+ if entry.attributes.mtime > backup_entry.attributes.mtime
67
+ backup_entry = entry
68
+ end
69
+ end
70
+ OpenStruct.new(:name => File.join(backup_dir, backup_entry.name), :mtime => backup_entry.attributes.mtime, :size => backup_entry.attributes.size) if backup_entry
71
+ rescue => e
72
+ log "#{e.to_s}"
73
+ nil
74
+ end
75
+
76
+ def glob_escape(glob)
77
+ glob.gsub('.', '\\.').gsub('*', '.*')
78
+ end
79
+
80
+ def find_last_s3_backup(prefix, suffix, backup_dir = nil)
81
+ s3_backup = nil
82
+ s3_bucket = config.s3[:bucket]
83
+ s3_prefix = backup_dir ? File.join(backup_dir, prefix) : prefix
84
+ s3.buckets[s3_bucket].objects.with_prefix(s3_prefix).each do |s3_object|
85
+ next unless s3_object.key =~ /#{glob_escape(suffix)}\Z/
86
+ s3_backup = s3_object
87
+ end
88
+ OpenStruct.new(:name => s3_backup.key, :mtime => s3_backup.last_modified.to_i, :size => s3_backup.content_length, :io => s3_backup) if s3_backup
89
+ rescue => e
90
+ log "#{e.to_s}"
91
+ nil
92
+ end
93
+
94
+ private
95
+
96
+ SFTP_MAX_TIMEOUT = 30
97
+
98
+ def pipeline_with_log(*args)
99
+ opts = Hash === args.last ? args.pop.dup : {}
100
+ err_r, err_w = IO.pipe
101
+ threads = []
102
+ process = 0
103
+ Open3.pipeline_start(*args, opts.merge(:err => err_w)) do |ts|
104
+ ts.each { |t| wait_unless_done(t.value) }
105
+ err_w.close
106
+ log(err_r.read)
107
+ ts.each { |x| log("executed '#{args[process].join( ' ')}' - #{x.value}"); process += 1 }
108
+ threads += ts
109
+ end
110
+ threads.map(&:value)
111
+ end
112
+
113
+ def wait_unless_done(process)
114
+ Process.wait(process.pid) if process && process.try(:pid)
115
+ rescue
116
+ end
117
+
118
+ def unpack_encrypted_file(encrypted_file, unpack_dir)
119
+ FileUtils.mkdir_p(unpack_dir)
120
+ out_file_name = File.join(unpack_dir, File.basename(encrypted_file).chomp('.gz.enc'))
121
+ status = pipeline_with_log(decrypt(encrypted_file), expand, :out => out_file_name)
122
+ raise SystemExit, "Problem unpacking #{encrypted_file}" unless status.all? { |x| x.success? }
123
+ end
124
+
125
+ def unpack_encrypted_archive(encrypted_archive, unpack_dir)
126
+ tmp_unpack_dir = Dir.mktmpdir('data_exporter')
127
+
128
+ status = Dir.chdir(tmp_unpack_dir) do
129
+ pipeline_with_log(decrypt(encrypted_archive), expand, unarchive)
130
+ end
131
+ raise SystemExit, "Problem unpacking #{encrypted_archive}" unless status.all? { |x| x.success? }
132
+
133
+ archive_unpack_dir = File.join(tmp_unpack_dir, config.archive_base_directory, '/')
134
+ FileUtils.rmtree(unpack_dir)
135
+ FileUtils.move(archive_unpack_dir, unpack_dir)
136
+ end
137
+
138
+ def export_encrypted_archive(backup_key, encrypted_archive, archive_dir = nil)
139
+ local_archive_name = File.join(config.export_dir, File.basename(encrypted_archive).chomp('.gz.enc'))
140
+ archive_dir ? archive_dir_csv_export(local_archive_name, archive_dir) : mysql_csv_export(local_archive_name)
141
+ pipeline_with_log(compress(local_archive_name), encrypt(backup_key), :out => encrypted_archive)
142
+ log "removing #{local_archive_name}"
143
+ FileUtils.rm(local_archive_name)
144
+ end
145
+
146
+ def export_encrypted_file(backup_key, encrypted_file)
147
+ pipeline_with_log(mysqldump_export, compress, encrypt(backup_key), :out => encrypted_file)
148
+ end
149
+
150
+ def download(remote_file_info, download_dir)
151
+ if config.sftp_enabled?
152
+ download_via_sftp(remote_file_info, download_dir)
153
+ else
154
+ download_via_s3(remote_file_info, download_dir)
155
+ end
156
+ end
157
+
158
+ def download_via_sftp(remote_file_info, local_dir)
159
+ local_file = nil
160
+ local_file = File.join(local_dir, File.basename(remote_file_info.name))
161
+ sftp.download!(remote_file_info.name, local_file) do |event, downloader, *args|
162
+ case event
163
+ when :open then
164
+ # args[0] : file metadata
165
+ log "downloading #{args[0].remote} -> #{args[0].local} (#{args[0].size} bytes)"
166
+ when :close then
167
+ # args[0] : file metadata
168
+ log "finished #{args[0].remote}"
169
+ end
170
+ end
171
+ local_file
172
+ end
173
+
174
+ def download_via_s3(remote_file_info, local_dir)
175
+ local_file = File.join(local_dir, File.basename(remote_file_info.name))
176
+ log "downloading #{remote_file_info.name} to #{local_file}"
177
+ File.open(local_file, 'w') do |file|
178
+ remote_file_info.io.read do |chunk|
179
+ file.write(chunk)
180
+ end
181
+ end
182
+ local_file
183
+ end
184
+
185
+ def upload(local_file, remote_dir)
186
+ if config.sftp_enabled?
187
+ upload_via_sftp(local_file, remote_dir)
188
+ else
189
+ upload_via_s3(local_file, remote_dir)
190
+ end
191
+ end
192
+
193
+ def upload_via_sftp(local_file, backup_dir = nil)
194
+ remote_file = backup_dir ? File.join(backup_dir, File.basename(local_file)) : File.basename(local_file)
195
+ sftp.mkdir(backup_dir)
196
+ sftp.upload!(local_file, remote_file) do |event, uploader, *args|
197
+ case event
198
+ when :open then
199
+ # args[0] : file metadata
200
+ log "uploading #{args[0].local} -> #{args[0].remote} (#{args[0].size} bytes)"
201
+ when :close then
202
+ # args[0] : file metadata
203
+ log "finished #{args[0].remote}"
204
+ end
205
+ end
206
+ end
207
+
208
+ def upload_via_s3(local_file, backup_dir = nil)
209
+ s3_bucket = config.s3[:bucket]
210
+ s3_file = backup_dir ? File.join(backup_dir, File.basename(local_file)) : File.basename(local_file)
211
+ log "uploading #{local_file} to s3://#{s3_bucket}/#{s3_file}"
212
+ s3.buckets[s3_bucket].objects[s3_file].write(open(local_file))
213
+ end
214
+
215
+ def archive_dir_csv_export(archive_name, archive_dir)
216
+ log "creating #{archive_name}"
217
+ DataExporter::Archive.open(archive_name) do |archive|
218
+ Dir.glob(File.join(archive_dir, '*')).each do |file_name|
219
+ archive_file_name = File.join(config.archive_base_directory, File::basename(file_name))
220
+ log "appending #{archive_file_name}"
221
+ archive.append(archive_file_name, file_name)
222
+ end
223
+ end
224
+ end
225
+
226
+ def mysql_csv_export(archive_name)
227
+ log "creating #{archive_name}"
228
+ DataExporter::Archive.open(archive_name) do |archive|
229
+ mysql_to_csv do |file_name|
230
+ archive_file_name = File.join(config.archive_base_directory, File::basename(file_name))
231
+ log "appending #{archive_file_name}"
232
+ archive.append(archive_file_name, file_name)
233
+ FileUtils.rm(file_name)
234
+ end
235
+ end
236
+ end
237
+
238
+ # NOTE database lock is necessary for consistency
239
+ def mysql_to_csv(&block)
240
+ begin
241
+ connection_pool = DataExporter::database_connection
242
+ connection_pool.connection.tables.each do |table|
243
+ next if IGNORED_TABLES.include?(table)
244
+ fields = connection_pool.connection.columns(table).map(&:name)
245
+ total = 0
246
+ connection_pool.connection.select_rows("SELECT COUNT(*) AS count FROM #{table};").each { |result| total = result[0] }
247
+ selected_fields = fields - config.pii_fields(table)
248
+ page = 1
249
+ block_size = 500_000
250
+ total_number_of_pages = (total.to_f / block_size.to_f).ceil
251
+ last_id = 0
252
+ quoted_selected_fields = selected_fields.map { |field| connection_pool.connection.quote_column_name(field) }
253
+ while page <= total_number_of_pages do
254
+ csv_file = "#{File.join(config.export_dir, "#{table}_#{page}")}.csv"
255
+ CSV.open(csv_file, "w") do |csv|
256
+ csv << selected_fields
257
+ results = connection_pool.connection.select_rows("SELECT #{quoted_selected_fields.join(',')} FROM #{table} #{"WHERE id > #{last_id} ORDER BY id ASC LIMIT #{block_size}" if fields.include?('id')};")
258
+ if results
259
+ results.each do |table_values|
260
+ begin
261
+ csv << table_values
262
+ rescue => e
263
+ STDERR.puts "skipping #{table}.id = #{table_values[0]} - #{e.to_s}"
264
+ rescue => g
265
+ STDERR.puts "#{g.to_s}"
266
+ end
267
+ end
268
+ last_id = results.last[0] if fields.include?('id')
269
+ end
270
+ end
271
+ yield csv_file
272
+ page += 1
273
+ end
274
+ end
275
+ ensure
276
+ connection_pool.disconnect!
277
+ end
278
+ end
279
+
280
+ def mysqldump_export
281
+ [
282
+ config.mysqldump_path,
283
+ *config.mysqldump_options,
284
+ config.database[:host] ? "--host=#{config.database[:host]}" : nil,
285
+ config.database[:port] ? "--port=#{config.database[:port]}" : nil,
286
+ "--user=#{config.database[:username]}",
287
+ config.database[:password] ? "--password=#{config.database[:password]}" : nil,
288
+ config.database[:database]
289
+ ].compact
290
+ end
291
+
292
+ def log(*a); end
293
+
294
+ def https_proxy
295
+ ENV['https_proxy'] || ENV['HTTPS_PROXY']
296
+ end
297
+
298
+ def s3_options
299
+ @s3_options ||=
300
+ {
301
+ :access_key_id => config.s3[:access_key_id],
302
+ :secret_access_key => config.s3[:secret_access_key],
303
+ :proxy_uri => https_proxy,
304
+ :use_ssl => true
305
+ }
306
+ end
307
+
308
+ def s3
309
+ @s3 ||= AWS::S3.new(s3_options)
310
+ end
311
+
312
+ def sftp
313
+ @sftp ||= Net::SFTP.start(config.sftp[:host], config.sftp[:user], timeout: SFTP_MAX_TIMEOUT)
314
+ end
315
+
316
+ def redis
317
+ require 'redis'
318
+ @redis ||= Redis.new(:host => config.redis[:host], :port => config.redis[:port])
319
+ end
320
+ end
321
+ end
@@ -0,0 +1,25 @@
1
+ require 'archive/tar/minitar'
2
+
3
+ module DataExporter
4
+ class Archive
5
+ class << self
6
+ def open(archive_filename, &block)
7
+ File.open(archive_filename, 'w') do |file|
8
+ ::Archive::Tar::Minitar::Writer.open(file) do |writer|
9
+ yield self.new(writer)
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ def initialize(writer)
16
+ @writer = writer
17
+ end
18
+
19
+ def append(archive_filename, filename)
20
+ @writer.add_file(archive_filename, :mode => 0644, :mtime => Time.now) do |archive_file|
21
+ archive_file.write File.read(filename)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,154 @@
1
+ require 'thor'
2
+ require 'logger'
3
+
4
+ module DataExporter
5
+ class CLI < Thor
6
+ include DataExporter::Actions
7
+
8
+ namespace :data
9
+ class_option 'debug', :aliases => '-d', :desc => 'Turn on debug logging', :default => false
10
+ class_option 'csv', :desc => 'perform csv unpack', :default => false
11
+ class_option 'quiet', :aliases => '-q', :desc => 'silence diagnostic information', :default => false
12
+ class_option 'config_file', :aliases => '-f', :desc => 'configuration file to load', :required => true
13
+ class_option 'preserve', :aliases => '-p', :desc => 'preserve unpack after download', :default => false
14
+ class_option 'mode', :desc => 'backup mode, corresponds to section data_exporter.yml configuration file'
15
+
16
+ desc 'unpack', 'unpack mysql database'
17
+ method_option 'date', :desc => 'unpack export for date'
18
+ method_option 'unpack_dir', :desc => 'directory to unpack export into'
19
+ def unpack_task
20
+ config.load(options.merge(:mysql_required => false))
21
+
22
+ remote_backup = find_last_backup(config.backup_prefix, backup_suffix, config.backup_dir)
23
+
24
+ abort no_backups_message unless remote_backup
25
+
26
+ FileUtils.mkdir_p(config.download_dir)
27
+ FileUtils.mkdir_p(File.dirname(config.unpack_dir))
28
+
29
+ begin
30
+ local_encrypted_archive = download(remote_backup, config.download_dir)
31
+ log "expanding #{local_encrypted_archive}"
32
+ unpack(local_encrypted_archive, config.unpack_dir)
33
+ ensure
34
+ unless options[:preserve]
35
+ log "removing #{local_encrypted_archive}"
36
+ FileUtils.rm(local_encrypted_archive)
37
+ end
38
+ end
39
+ end
40
+
41
+ desc 'export', 'export mysql database'
42
+ method_option 'pii_file', :desc => 'tables and columns labeled as pii'
43
+ method_option 'archive_dir', :desc => 'local directory to export'
44
+ method_option 'date', :desc => 'date of export'
45
+ def export_task
46
+ config.load(options.merge(:mysql_required => !options[:archive_dir]))
47
+ raise ArgumentError, '--csv required for --archive-dir' if options[:archive_dir] && !config.csv_enabled?
48
+
49
+ local_encrypted_archive_name = File.join(config.export_dir, encrypted_export_archive_name)
50
+ log "creating #{local_encrypted_archive_name}"
51
+ export(config.backup_key, local_encrypted_archive_name, options[:archive_dir])
52
+
53
+ begin
54
+ upload(local_encrypted_archive_name, config.backup_dir)
55
+ ensure
56
+ unless options[:preserve]
57
+ log "removing #{local_encrypted_archive_name}"
58
+ FileUtils.rm(local_encrypted_archive_name)
59
+ end
60
+ end
61
+ end
62
+
63
+ desc 'status', 'display status of current exported backups'
64
+ method_option 'date', :desc => 'unpack export for date'
65
+ method_option 'redis_key_prefix', :desc => 'redis_key_prefix for monitoring keys'
66
+ def status_task
67
+ config.load(options)
68
+
69
+ remote_backup = find_last_backup(config.backup_prefix, backup_suffix, config.backup_dir)
70
+
71
+ abort no_backups_message unless remote_backup
72
+ log("last backup %s at %s (%s)" % [remote_backup.name, Time.at(remote_backup.mtime).utc.iso8601, bytes(remote_backup.size)])
73
+
74
+ update_redis_counters(remote_backup, options[:redis_key_prefix]) if options[:redis_key_prefix]
75
+ end
76
+
77
+ private
78
+
79
+ def logger
80
+ @logger ||= Logger.new(STDOUT)
81
+ end
82
+
83
+ def log(*a)
84
+ if options[:debug]
85
+ logger.info(*a)
86
+ else
87
+ say *a unless options[:quiet]
88
+ end
89
+ end
90
+
91
+ def export_date_format
92
+ (options[:date] ? Date.parse(options[:date]) : Time.now).strftime("%Y-%m-%d-%H-%M")
93
+ end
94
+
95
+ def export_base_name
96
+ "#{config.backup_prefix}_#{export_date_format}_db"
97
+ end
98
+
99
+ def export_archive_name
100
+ if config.csv_enabled?
101
+ [export_base_name, 'csv', 'tar'].join('.')
102
+ else
103
+ [export_base_name, 'sql'].join('.')
104
+ end
105
+ end
106
+
107
+ def encrypted_export_archive_name
108
+ [export_archive_name, 'gz', 'enc'].join('.')
109
+ end
110
+
111
+ def backup_suffix
112
+ suffix =
113
+ if config.csv_enabled?
114
+ ['csv', 'tar', 'gz', 'enc']
115
+ else
116
+ ['sql', 'gz', 'enc']
117
+ end
118
+ suffix.unshift "#{options[:date]}*_db" if options[:date]
119
+ suffix.join('.')
120
+ end
121
+
122
+ def no_backups_message
123
+ if config.sftp_enabled?
124
+ "No backups found in #{config.backup_dir} matching #{config.backup_prefix}.*#{backup_suffix}"
125
+ else
126
+ "No backups found in #{config.s3[:bucket]}/#{config.backup_dir} matching #{config.backup_prefix}.*#{backup_suffix}"
127
+ end
128
+ end
129
+
130
+ def bytes(size)
131
+ {GB: 30, MB: 20, KB: 10}.each do |prefix, pow|
132
+ b = size >> pow
133
+ return "#{b} #{prefix}" if b > 0
134
+ end
135
+ return "#{size} B"
136
+ end
137
+
138
+ def update_redis_counters(remote_backup, redis_key_prefix)
139
+ counters = {}
140
+ if config.csv_enabled?
141
+ counters["#{redis_key_prefix}:mysql_csv_last_backup_timestamp"] = remote_backup.mtime
142
+ counters["#{redis_key_prefix}:mysql_csv_last_backup_size"] = remote_backup.size
143
+ else
144
+ counters["#{redis_key_prefix}:mysql_last_backup_timestamp"] = remote_backup.mtime
145
+ counters["#{redis_key_prefix}:mysql_last_backup_size"] = remote_backup.size
146
+ end
147
+
148
+ counters.each do |key, val|
149
+ log "setting redis #{key} to #{val}"
150
+ redis.set(key, val)
151
+ end
152
+ end
153
+ end
154
+ end