dump_truck 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in dump_truck.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Allen Madsen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # DumpTruck
2
+
3
+ A simple DSL to specify how to dump data from a production environment for use in a developer environment. Provides tools to limit what data is pulled and obfuscate data that is pulled.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'dump_truck'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install dump_truck
18
+
19
+ ## Usage
20
+
21
+ Create a ruby file that looks like this:
22
+
23
+ ``` ruby
24
+ require 'logger'
25
+
26
+ DumpTruck.configure do
27
+ # You can optionally provide a logger. If no logger is provided,
28
+ # information will be logged to standard out.
29
+ logger Logger.new('dump.log')
30
+
31
+ # You can specify an arbitrary number of databases to connect to
32
+ database(:mysql) do
33
+ # Where and how to connect
34
+ hostname ENV['hostname']
35
+ user ENV['user']
36
+ password ENV['password'] # pass nil if there is no password
37
+
38
+ # The schemas to dump from this database.
39
+ schema 'app_production' do
40
+ # Directory to place the resulting sql dump
41
+ target_path ENV['target_path']
42
+
43
+ # File name for this schema sql dump. '.sql' is appended
44
+ # automatically to the name. If unspecified, the name of the schema
45
+ # is used.
46
+ target_file{|schema| schema + Time.now.strftime("_%Y%m%d%H%M%S")}
47
+
48
+ # Tables rules can be defined to have one of four modes:
49
+ # * keep - all data for that table is dumped
50
+ # * keep 'some query' - data that satifies the query is dumped
51
+ # * truncate - no data is dumped
52
+ # * ignore - the table and data are not dumped
53
+ #
54
+ # Tables also define obfuscation rules. If no rules is defined for
55
+ # a column, the value is dumped as is. Obfuscation rules can
56
+ # optionally receive the value and a number which will be unique
57
+ # for each row of that table. If an obfuscation rule is defined for
58
+ # a field that doesn't exist for the table, it is ignored. This
59
+ # is most useful for the default table rule, because it allows
60
+ # obfuscation of all email fields for tables that have them.
61
+ #
62
+ # The default table rule is applied to any table that does not have
63
+ # a rule defined. Otherwise, it behaves like any other table rule.
64
+
65
+ table_default do
66
+ keep "created_at > now() - interval 6 month"
67
+
68
+ obfuscate(:email){|email, n| "my.email+#{n}@gmail.com"}
69
+ end
70
+
71
+ table(:users) do
72
+ keep
73
+
74
+ obfuscate(:email){|email, n| "my.email+#{n}@gmail.com"}
75
+ obfuscate(:password){'password'}
76
+ obfuscate(:name){|name| name.split('').shuffle.join}
77
+ end
78
+
79
+ table(:delayed_jobs){keep "deleted_at is null"}
80
+ table(:roles){keep}
81
+ table(:emails){truncate}
82
+ table(:credit_cards){ignore}
83
+ end
84
+ end
85
+ end
86
+ ```
87
+
88
+ ## Contributing
89
+
90
+ 1. Fork it ( http://github.com/secondrotation/dump_truck/fork )
91
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
92
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
93
+ 4. Push to the branch (`git push origin my-new-feature`)
94
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'dump_truck/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "dump_truck"
8
+ spec.version = DumpTruck::VERSION
9
+ spec.authors = ["Allen Madsen"]
10
+ spec.email = ["blatyo@gmail.com"]
11
+ spec.summary = %q{DSL to dump production data for developer use.}
12
+ spec.description = %q{DSL to dump production data for developer use. Allows selection of data and obfuscation.}
13
+ spec.homepage = "https://github.com/secondrotation/dump_truck"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'colorize'
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.5"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "pry-debugger"
27
+ end
data/lib/dump_truck.rb ADDED
@@ -0,0 +1,64 @@
1
+ require 'dump_truck/version'
2
+
3
+ require 'colorize'
4
+
5
+ require 'dump_truck/configuration'
6
+ require 'dump_truck/database_configuration'
7
+ require 'dump_truck/schema_configuration'
8
+ require 'dump_truck/table_configuration'
9
+
10
+ require 'dump_truck/target'
11
+ require 'dump_truck/truck'
12
+ require 'dump_truck/loggable_truck'
13
+ require 'dump_truck/mysql'
14
+
15
+ class DumpTruck
16
+ attr_reader :config
17
+
18
+ def initialize(&block)
19
+ @config = Configuration.new(&block)
20
+ end
21
+
22
+ def dump
23
+ config.each_database do |db_config|
24
+ dump_database(db_config)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def dump_database(db_config)
31
+ db_config.each_schema.map do |schema_config|
32
+ truck(db_config, schema_config)
33
+ end.map do |truck|
34
+ Thread.new(truck){|t| t.dump}
35
+ end.each do |thread|
36
+ thread.join
37
+ end
38
+ end
39
+
40
+ def truck(db_config, schema_config)
41
+ translator = translator_for(db_config.type)
42
+ client = client_for(db_config.type, db_config, schema_config)
43
+
44
+ LoggableTruck.new(schema_config, client, translator, config.logger)
45
+ end
46
+
47
+ def translator_for(type)
48
+ case type.to_sym
49
+ when :mysql
50
+ Mysql::Translator.new
51
+ else
52
+ raise "Unknown type #{type}"
53
+ end
54
+ end
55
+
56
+ def client_for(type, db_config, schema_config)
57
+ case type.to_sym
58
+ when :mysql
59
+ Mysql::Client.new(db_config, schema_config)
60
+ else
61
+ raise "Unknown type #{type}"
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,35 @@
1
+ require 'logger'
2
+
3
+ class DumpTruck
4
+ class Configuration
5
+ attr_reader :databases
6
+
7
+ def initialize
8
+ @databases = []
9
+ @logger = Logger.new($stdout)
10
+ @logger.formatter = proc do |severity, datetime, progname, msg|
11
+ "#{datetime.strftime('%Y-%m-%d %H:%M:%S').blue} #{msg}\n"
12
+ end
13
+
14
+ instance_eval(&Proc.new) if block_given?
15
+ end
16
+
17
+ def logger(logger = nil)
18
+ @logger = logger || @logger
19
+ end
20
+
21
+ def database(type, &block)
22
+ type = type.to_s.downcase.to_sym
23
+
24
+ @databases << DatabaseConfiguration.new(type, &block)
25
+ end
26
+
27
+ def each_database
28
+ if block_given?
29
+ @databases.each(&Proc.new)
30
+ else
31
+ @databases.each
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,61 @@
1
+ class DumpTruck
2
+ class DatabaseConfiguration
3
+ attr_reader :type
4
+
5
+ def initialize(type)
6
+ @type = type
7
+ @schemas = {}
8
+
9
+ instance_eval(&Proc.new) if block_given?
10
+ end
11
+
12
+ def ssh_user(ssh_user = nil)
13
+ @ssh_user = ssh_user || @ssh_user
14
+ end
15
+
16
+ def ssh_hostname(ssh_hostname = nil)
17
+ @ssh_hostname = ssh_hostname || @ssh_hostname
18
+ end
19
+
20
+ def user(user = nil)
21
+ @user = user || @user
22
+ end
23
+
24
+ def password(password = nil)
25
+ @password = password || @password
26
+ end
27
+
28
+ def hostname(hostname = nil)
29
+ @hostname = hostname || @hostname
30
+ end
31
+
32
+ def schema(name, &block)
33
+ name = name.to_s
34
+ @schemas[name] = SchemaConfiguration.new(name, &block)
35
+ end
36
+
37
+ def schemas
38
+ @schemas.values
39
+ end
40
+
41
+ def each_schema
42
+ if block_given?
43
+ schemas.each(&Proc.new)
44
+ else
45
+ schemas.each
46
+ end
47
+ end
48
+
49
+ def ==(other)
50
+ type = other.type &&
51
+ user == other.user &&
52
+ password == other.password &&
53
+ hostname == other.hostname &&
54
+ schemas == other.schemas
55
+ end
56
+
57
+ def to_s
58
+ "<DumpTruck::DatabaseConfiguration(#{type}) (#{schemas.map(&:to_s).join(', ')})>"
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,41 @@
1
+ class DumpTruck
2
+ class LoggableTruck < Truck
3
+ def initialize(schema_config, client, translator, logger)
4
+ super(schema_config, client, translator)
5
+ @logger = logger
6
+ @processed_rows = 0
7
+ end
8
+
9
+ protected
10
+
11
+ def extract_tables
12
+ @logger.info("Getting tables from #{schema_config.name.blue}")
13
+ super
14
+ end
15
+
16
+ def extract_table(line)
17
+ table = super
18
+ @logger.info("=> #{table.green} in #{schema_config.name.blue}")
19
+ table
20
+ end
21
+
22
+ def dump_schema(tables)
23
+ @logger.info("Dumping data from #{schema_config.name.blue}")
24
+ super
25
+ @logger.info("Dumped data from #{schema_config.name.blue}")
26
+ end
27
+
28
+ def dump_data(config, table, target)
29
+ @logger.info("=> Dumping #{table.green} in #{schema_config.name.blue}")
30
+ super
31
+ @logger.info("=> Dumped #{@processed_rows.to_s.green} rows from #{table.green} in #{schema_config.name.blue}")
32
+ @processed_rows = 0
33
+ end
34
+
35
+ def extract_insert(line)
36
+ fields, data = super
37
+ @processed_rows += data.size
38
+ [fields, data]
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,7 @@
1
+ class DumpTruck
2
+ module Mysql
3
+ end
4
+ end
5
+
6
+ require 'dump_truck/mysql/client'
7
+ require 'dump_truck/mysql/translator'
@@ -0,0 +1,73 @@
1
+ class DumpTruck
2
+ module Mysql
3
+ class Client
4
+ def initialize(connection_config, schema_config)
5
+ @ssh_user = connection_config.ssh_user
6
+ @ssh_hostname = connection_config.ssh_hostname
7
+ @schema = schema_config.name
8
+
9
+ write_defaults_file(connection_config)
10
+ end
11
+
12
+ def tables_dump
13
+ IO.popen(tables_dump_command, &Proc.new)
14
+ end
15
+
16
+ def data_dump(table_config, table)
17
+ IO.popen(data_dump_command(table_config, table), &Proc.new)
18
+ end
19
+
20
+ private
21
+ attr_reader :ssh_user, :ssh_hostname, :hostname, :user, :password, :schema, :file
22
+
23
+ def cnf_file
24
+ file.path
25
+ end
26
+
27
+ def write_defaults_file(connection_config)
28
+ hostname = connection_config.hostname
29
+ user = connection_config.user
30
+ password = connection_config.password
31
+
32
+ @file = Tempfile.new('my.cnf')
33
+ @file.write <<-MYCNF.gsub(/^\s+/, '')
34
+ [mysqldump]
35
+ user = #{user}
36
+ #{"password = #{password}" if !password.nil? && !password.empty?}
37
+ host = #{hostname}
38
+ hex-blob
39
+ no-create-db
40
+ skip-comments
41
+ single-transaction
42
+ complete-insert
43
+ compress
44
+ net_buffer_length = 1048576
45
+ MYCNF
46
+ @file.close
47
+ end
48
+
49
+ def tables_dump_command
50
+ "#{ssh_connection} mysqldump --defaults-extra-file=#{cnf_file} --no-data --no-set-names --no-tablespaces --skip-add-drop-table --skip-add-locks --skip-set-charset #{schema}"
51
+ end
52
+
53
+ def data_dump_command(config, table)
54
+ "#{ssh_connection} mysqldump --defaults-extra-file=#{cnf_file} #{data_flag(config)} #{where_flag(config)} #{schema} #{table}"
55
+ end
56
+
57
+ def ssh_connection
58
+ user_string = "#{@ssh_user}@" if @ssh_user
59
+
60
+ "ssh -C #{user_string}#{@ssh_hostname} " if @ssh_hostname
61
+ end
62
+
63
+ def data_flag(config)
64
+ config.mode == :none ? '--no-data' : ''
65
+ end
66
+
67
+ def where_flag(config)
68
+ config.mode == :some ? "--where='#{config.query}'" : ''
69
+ end
70
+ end
71
+ end
72
+ end
73
+