dump_truck 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +94 -0
- data/Rakefile +6 -0
- data/dump_truck.gemspec +27 -0
- data/lib/dump_truck.rb +64 -0
- data/lib/dump_truck/configuration.rb +35 -0
- data/lib/dump_truck/database_configuration.rb +61 -0
- data/lib/dump_truck/loggable_truck.rb +41 -0
- data/lib/dump_truck/mysql.rb +7 -0
- data/lib/dump_truck/mysql/client.rb +73 -0
- data/lib/dump_truck/mysql/translator.rb +123 -0
- data/lib/dump_truck/schema_configuration.rb +55 -0
- data/lib/dump_truck/table_configuration.rb +63 -0
- data/lib/dump_truck/target.rb +13 -0
- data/lib/dump_truck/truck.rb +70 -0
- data/lib/dump_truck/version.rb +3 -0
- data/spec/configuration_spec.rb +22 -0
- data/spec/databases_configuration_spec.rb +29 -0
- data/spec/dump_truck_spec.rb +39 -0
- data/spec/fixtures/mysql/expected_dump.sql +147 -0
- data/spec/fixtures/mysql/permissions.sql +38 -0
- data/spec/fixtures/mysql/roles.sql +39 -0
- data/spec/fixtures/mysql/tables.sql +111 -0
- data/spec/fixtures/mysql/users.sql +71 -0
- data/spec/mysql/translator_spec.rb +115 -0
- data/spec/schema_configuration_spec.rb +44 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/table_configuration_spec.rb +124 -0
- data/spec/truck_spec.rb +51 -0
- metadata +179 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Allen Madsen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# DumpTruck
|
2
|
+
|
3
|
+
A simple DSL to specify how to dump data from a production environment for use in a developer environment. Provides tools to limit what data is pulled and obfuscate data that is pulled.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'dump_truck'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install dump_truck
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Create a ruby file that looks like this:
|
22
|
+
|
23
|
+
``` ruby
|
24
|
+
require 'logger'
|
25
|
+
|
26
|
+
DumpTruck.configure do
|
27
|
+
# You can optionally provide a logger. If no logger is provided,
|
28
|
+
# information will be logged to standard out.
|
29
|
+
logger Logger.new('dump.log')
|
30
|
+
|
31
|
+
# You can specify an arbitrary number of databases to connect to
|
32
|
+
database(:mysql) do
|
33
|
+
# Where and how to connect
|
34
|
+
hostname ENV['hostname']
|
35
|
+
user ENV['user']
|
36
|
+
password ENV['password'] # pass nil if there is no password
|
37
|
+
|
38
|
+
# The schemas to dump from this database.
|
39
|
+
schema 'app_production' do
|
40
|
+
# Directory to place the resulting sql dump
|
41
|
+
target_path ENV['target_path']
|
42
|
+
|
43
|
+
# File name for this schema sql dump. '.sql' is appended
|
44
|
+
# automatically to the name. If unspecified, the name of the schema
|
45
|
+
# is used.
|
46
|
+
target_file{|schema| schema + Time.now.strftime("_%Y%m%d%H%M%S")}
|
47
|
+
|
48
|
+
# Tables rules can be defined to have one of four modes:
|
49
|
+
# * keep - all data for that table is dumped
|
50
|
+
# * keep 'some query' - data that satifies the query is dumped
|
51
|
+
# * truncate - no data is dumped
|
52
|
+
# * ignore - the table and data are not dumped
|
53
|
+
#
|
54
|
+
# Tables also define obfuscation rules. If no rules is defined for
|
55
|
+
# a column, the value is dumped as is. Obfuscation rules can
|
56
|
+
# optionally receive the value and a number which will be unique
|
57
|
+
# for each row of that table. If an obfuscation rule is defined for
|
58
|
+
# a field that doesn't exist for the table, it is ignored. This
|
59
|
+
# is most useful for the default table rule, because it allows
|
60
|
+
# obfuscation of all email fields for tables that have them.
|
61
|
+
#
|
62
|
+
# The default table rule is applied to any table that does not have
|
63
|
+
# a rule defined. Otherwise, it behaves like any other table rule.
|
64
|
+
|
65
|
+
table_default do
|
66
|
+
keep "created_at > now() - interval 6 month"
|
67
|
+
|
68
|
+
obfuscate(:email){|email, n| "my.email+#{n}@gmail.com"}
|
69
|
+
end
|
70
|
+
|
71
|
+
table(:users) do
|
72
|
+
keep
|
73
|
+
|
74
|
+
obfuscate(:email){|email, n| "my.email+#{n}@gmail.com"}
|
75
|
+
obfuscate(:password){'password'}
|
76
|
+
obfuscate(:name){|name| name.split('').shuffle.join}
|
77
|
+
end
|
78
|
+
|
79
|
+
table(:delayed_jobs){keep "deleted_at is null"}
|
80
|
+
table(:roles){keep}
|
81
|
+
table(:emails){truncate}
|
82
|
+
table(:credit_cards){ignore}
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
```
|
87
|
+
|
88
|
+
## Contributing
|
89
|
+
|
90
|
+
1. Fork it ( http://github.com/secondrotation/dump_truck/fork )
|
91
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
92
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
93
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
94
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/dump_truck.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'dump_truck/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "dump_truck"
|
8
|
+
spec.version = DumpTruck::VERSION
|
9
|
+
spec.authors = ["Allen Madsen"]
|
10
|
+
spec.email = ["blatyo@gmail.com"]
|
11
|
+
spec.summary = %q{DSL to dump production data for developer use.}
|
12
|
+
spec.description = %q{DSL to dump production data for developer use. Allows selection of data and obfuscation.}
|
13
|
+
spec.homepage = "https://github.com/secondrotation/dump_truck"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency 'colorize'
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "rspec"
|
26
|
+
spec.add_development_dependency "pry-debugger"
|
27
|
+
end
|
data/lib/dump_truck.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'dump_truck/version'
|
2
|
+
|
3
|
+
require 'colorize'
|
4
|
+
|
5
|
+
require 'dump_truck/configuration'
|
6
|
+
require 'dump_truck/database_configuration'
|
7
|
+
require 'dump_truck/schema_configuration'
|
8
|
+
require 'dump_truck/table_configuration'
|
9
|
+
|
10
|
+
require 'dump_truck/target'
|
11
|
+
require 'dump_truck/truck'
|
12
|
+
require 'dump_truck/loggable_truck'
|
13
|
+
require 'dump_truck/mysql'
|
14
|
+
|
15
|
+
class DumpTruck
|
16
|
+
attr_reader :config
|
17
|
+
|
18
|
+
def initialize(&block)
|
19
|
+
@config = Configuration.new(&block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def dump
|
23
|
+
config.each_database do |db_config|
|
24
|
+
dump_database(db_config)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def dump_database(db_config)
|
31
|
+
db_config.each_schema.map do |schema_config|
|
32
|
+
truck(db_config, schema_config)
|
33
|
+
end.map do |truck|
|
34
|
+
Thread.new(truck){|t| t.dump}
|
35
|
+
end.each do |thread|
|
36
|
+
thread.join
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def truck(db_config, schema_config)
|
41
|
+
translator = translator_for(db_config.type)
|
42
|
+
client = client_for(db_config.type, db_config, schema_config)
|
43
|
+
|
44
|
+
LoggableTruck.new(schema_config, client, translator, config.logger)
|
45
|
+
end
|
46
|
+
|
47
|
+
def translator_for(type)
|
48
|
+
case type.to_sym
|
49
|
+
when :mysql
|
50
|
+
Mysql::Translator.new
|
51
|
+
else
|
52
|
+
raise "Unknown type #{type}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def client_for(type, db_config, schema_config)
|
57
|
+
case type.to_sym
|
58
|
+
when :mysql
|
59
|
+
Mysql::Client.new(db_config, schema_config)
|
60
|
+
else
|
61
|
+
raise "Unknown type #{type}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
class DumpTruck
|
4
|
+
class Configuration
|
5
|
+
attr_reader :databases
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@databases = []
|
9
|
+
@logger = Logger.new($stdout)
|
10
|
+
@logger.formatter = proc do |severity, datetime, progname, msg|
|
11
|
+
"#{datetime.strftime('%Y-%m-%d %H:%M:%S').blue} #{msg}\n"
|
12
|
+
end
|
13
|
+
|
14
|
+
instance_eval(&Proc.new) if block_given?
|
15
|
+
end
|
16
|
+
|
17
|
+
def logger(logger = nil)
|
18
|
+
@logger = logger || @logger
|
19
|
+
end
|
20
|
+
|
21
|
+
def database(type, &block)
|
22
|
+
type = type.to_s.downcase.to_sym
|
23
|
+
|
24
|
+
@databases << DatabaseConfiguration.new(type, &block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def each_database
|
28
|
+
if block_given?
|
29
|
+
@databases.each(&Proc.new)
|
30
|
+
else
|
31
|
+
@databases.each
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class DumpTruck
|
2
|
+
class DatabaseConfiguration
|
3
|
+
attr_reader :type
|
4
|
+
|
5
|
+
def initialize(type)
|
6
|
+
@type = type
|
7
|
+
@schemas = {}
|
8
|
+
|
9
|
+
instance_eval(&Proc.new) if block_given?
|
10
|
+
end
|
11
|
+
|
12
|
+
def ssh_user(ssh_user = nil)
|
13
|
+
@ssh_user = ssh_user || @ssh_user
|
14
|
+
end
|
15
|
+
|
16
|
+
def ssh_hostname(ssh_hostname = nil)
|
17
|
+
@ssh_hostname = ssh_hostname || @ssh_hostname
|
18
|
+
end
|
19
|
+
|
20
|
+
def user(user = nil)
|
21
|
+
@user = user || @user
|
22
|
+
end
|
23
|
+
|
24
|
+
def password(password = nil)
|
25
|
+
@password = password || @password
|
26
|
+
end
|
27
|
+
|
28
|
+
def hostname(hostname = nil)
|
29
|
+
@hostname = hostname || @hostname
|
30
|
+
end
|
31
|
+
|
32
|
+
def schema(name, &block)
|
33
|
+
name = name.to_s
|
34
|
+
@schemas[name] = SchemaConfiguration.new(name, &block)
|
35
|
+
end
|
36
|
+
|
37
|
+
def schemas
|
38
|
+
@schemas.values
|
39
|
+
end
|
40
|
+
|
41
|
+
def each_schema
|
42
|
+
if block_given?
|
43
|
+
schemas.each(&Proc.new)
|
44
|
+
else
|
45
|
+
schemas.each
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(other)
|
50
|
+
type = other.type &&
|
51
|
+
user == other.user &&
|
52
|
+
password == other.password &&
|
53
|
+
hostname == other.hostname &&
|
54
|
+
schemas == other.schemas
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_s
|
58
|
+
"<DumpTruck::DatabaseConfiguration(#{type}) (#{schemas.map(&:to_s).join(', ')})>"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class DumpTruck
|
2
|
+
class LoggableTruck < Truck
|
3
|
+
def initialize(schema_config, client, translator, logger)
|
4
|
+
super(schema_config, client, translator)
|
5
|
+
@logger = logger
|
6
|
+
@processed_rows = 0
|
7
|
+
end
|
8
|
+
|
9
|
+
protected
|
10
|
+
|
11
|
+
def extract_tables
|
12
|
+
@logger.info("Getting tables from #{schema_config.name.blue}")
|
13
|
+
super
|
14
|
+
end
|
15
|
+
|
16
|
+
def extract_table(line)
|
17
|
+
table = super
|
18
|
+
@logger.info("=> #{table.green} in #{schema_config.name.blue}")
|
19
|
+
table
|
20
|
+
end
|
21
|
+
|
22
|
+
def dump_schema(tables)
|
23
|
+
@logger.info("Dumping data from #{schema_config.name.blue}")
|
24
|
+
super
|
25
|
+
@logger.info("Dumped data from #{schema_config.name.blue}")
|
26
|
+
end
|
27
|
+
|
28
|
+
def dump_data(config, table, target)
|
29
|
+
@logger.info("=> Dumping #{table.green} in #{schema_config.name.blue}")
|
30
|
+
super
|
31
|
+
@logger.info("=> Dumped #{@processed_rows.to_s.green} rows from #{table.green} in #{schema_config.name.blue}")
|
32
|
+
@processed_rows = 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def extract_insert(line)
|
36
|
+
fields, data = super
|
37
|
+
@processed_rows += data.size
|
38
|
+
[fields, data]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
class DumpTruck
|
2
|
+
module Mysql
|
3
|
+
class Client
|
4
|
+
def initialize(connection_config, schema_config)
|
5
|
+
@ssh_user = connection_config.ssh_user
|
6
|
+
@ssh_hostname = connection_config.ssh_hostname
|
7
|
+
@schema = schema_config.name
|
8
|
+
|
9
|
+
write_defaults_file(connection_config)
|
10
|
+
end
|
11
|
+
|
12
|
+
def tables_dump
|
13
|
+
IO.popen(tables_dump_command, &Proc.new)
|
14
|
+
end
|
15
|
+
|
16
|
+
def data_dump(table_config, table)
|
17
|
+
IO.popen(data_dump_command(table_config, table), &Proc.new)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
attr_reader :ssh_user, :ssh_hostname, :hostname, :user, :password, :schema, :file
|
22
|
+
|
23
|
+
def cnf_file
|
24
|
+
file.path
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_defaults_file(connection_config)
|
28
|
+
hostname = connection_config.hostname
|
29
|
+
user = connection_config.user
|
30
|
+
password = connection_config.password
|
31
|
+
|
32
|
+
@file = Tempfile.new('my.cnf')
|
33
|
+
@file.write <<-MYCNF.gsub(/^\s+/, '')
|
34
|
+
[mysqldump]
|
35
|
+
user = #{user}
|
36
|
+
#{"password = #{password}" if !password.nil? && !password.empty?}
|
37
|
+
host = #{hostname}
|
38
|
+
hex-blob
|
39
|
+
no-create-db
|
40
|
+
skip-comments
|
41
|
+
single-transaction
|
42
|
+
complete-insert
|
43
|
+
compress
|
44
|
+
net_buffer_length = 1048576
|
45
|
+
MYCNF
|
46
|
+
@file.close
|
47
|
+
end
|
48
|
+
|
49
|
+
def tables_dump_command
|
50
|
+
"#{ssh_connection} mysqldump --defaults-extra-file=#{cnf_file} --no-data --no-set-names --no-tablespaces --skip-add-drop-table --skip-add-locks --skip-set-charset #{schema}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def data_dump_command(config, table)
|
54
|
+
"#{ssh_connection} mysqldump --defaults-extra-file=#{cnf_file} #{data_flag(config)} #{where_flag(config)} #{schema} #{table}"
|
55
|
+
end
|
56
|
+
|
57
|
+
def ssh_connection
|
58
|
+
user_string = "#{@ssh_user}@" if @ssh_user
|
59
|
+
|
60
|
+
"ssh -C #{user_string}#{@ssh_hostname} " if @ssh_hostname
|
61
|
+
end
|
62
|
+
|
63
|
+
def data_flag(config)
|
64
|
+
config.mode == :none ? '--no-data' : ''
|
65
|
+
end
|
66
|
+
|
67
|
+
def where_flag(config)
|
68
|
+
config.mode == :some ? "--where='#{config.query}'" : ''
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|