toku 0.1.0.4.3 → 0.1.0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tasks/generate_config.rake +10 -3
- data/lib/toku.rb +77 -46
- data/lib/toku/filters/column/nullify.rb +9 -0
- data/lib/toku/filters/column/obfuscate.rb +9 -0
- data/lib/toku/version.rb +1 -1
- data/toku.gemspec +1 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d14fc10f4732aaa04b7d3fab0da39e55eb35c0d9
|
4
|
+
data.tar.gz: 72a5299d37764a11eb1556f3125b8fae80c8518e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b132102fd9d06dfcb2eda58fa76801bbf26a0f7990e0b4df2a5a035aa8b276d9e278b56d22a0e844ad4ee4da3b2af10481bce5cb7e0673a7c14d00b42bfb9dd6
|
7
|
+
data.tar.gz: f78d11cfa53c5502e818f9c6e03c1f324a489df3da064bda01b7df58e3e6feabce933cc03743e5fdaa7013c3d6772805e4941c56761a06c8d1111ea99f02d389
|
@@ -1,18 +1,25 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
# Run only in a Rails environment
|
3
|
-
|
3
|
+
# Run this task to generate a base config file for your Rails application
|
4
|
+
# after run it's up to you to whitelist specific arguments in the list
|
4
5
|
namespace :toku do
|
5
6
|
desc 'Generate base config file from your base database schema in Rails'
|
6
7
|
task genesis_rails: :environment do
|
7
|
-
|
8
|
+
path = Rails.root.join('tmp').join('config.yml')
|
9
|
+
File.open(path, 'w') do |f|
|
8
10
|
hash = {}
|
9
11
|
ActiveRecord::Base.connection.tables.each do |table|
|
10
12
|
hash[table] = { 'columns' => {}, 'rows' => [] }
|
11
13
|
ActiveRecord::Base.connection.columns(table).map(&:name).each do |column|
|
12
|
-
|
14
|
+
if column.ends_with?("_at") || column.ends_with?("id") || column == 'id'
|
15
|
+
hash[table]['columns'][column] = ['none']
|
16
|
+
else
|
17
|
+
hash[table]['columns'][column] = ['obfuscate']
|
18
|
+
end
|
13
19
|
end
|
14
20
|
end
|
15
21
|
f.write(hash.to_yaml)
|
16
22
|
end
|
23
|
+
puts "Pickup your config file in #{path}"
|
17
24
|
end
|
18
25
|
end
|
data/lib/toku.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
require "toku/version"
|
2
2
|
require "uri"
|
3
|
+
require 'sequel'
|
4
|
+
require 'csv'
|
5
|
+
require 'concurrent'
|
3
6
|
|
4
7
|
Dir[File.dirname(__FILE__) + "/toku/**/*.rb"].each { |file| require file }
|
5
8
|
|
@@ -13,7 +16,9 @@ module Toku
|
|
13
16
|
none: Toku::ColumnFilter::Passthrough,
|
14
17
|
faker_last_name: Toku::ColumnFilter::FakerLastName,
|
15
18
|
faker_first_name: Toku::ColumnFilter::FakerFirstName,
|
16
|
-
faker_email: Toku::ColumnFilter::FakerEmail
|
19
|
+
faker_email: Toku::ColumnFilter::FakerEmail,
|
20
|
+
obfuscate: Toku::ColumnFilter::Obfuscate,
|
21
|
+
nullify: Toku::ColumnFilter::Nullify
|
17
22
|
}
|
18
23
|
|
19
24
|
# A few default row filters mappings
|
@@ -21,11 +26,14 @@ module Toku
|
|
21
26
|
drop: Toku::RowFilter::Drop
|
22
27
|
}
|
23
28
|
|
24
|
-
|
29
|
+
THREADPOOL_SIZE = (Concurrent.processor_count * 2).freeze
|
30
|
+
|
31
|
+
SCHEMA_DUMP_PATH = "tmp/toku_source_schema_dump.sql"
|
25
32
|
|
26
33
|
# @param [String] config_file_path path of config file
|
27
34
|
def initialize(config_file_path, column_filters = {}, row_filters = {})
|
28
|
-
@config = YAML.load(File.read(config_file_path))
|
35
|
+
@config = YAML.load(ERB.new(File.read(config_file_path)).result)
|
36
|
+
@threadpool = Concurrent::FixedThreadPool.new(THREADPOOL_SIZE)
|
29
37
|
self.column_filters = column_filters.merge(COLUMN_FILTER_MAP)
|
30
38
|
self.row_filters = row_filters.merge(ROW_FILTER_MAP)
|
31
39
|
Sequel::Database.extension(:pg_streaming)
|
@@ -35,76 +43,99 @@ module Toku
|
|
35
43
|
# @param uri_db_destination [String] URI of the destination DB
|
36
44
|
# @return [void]
|
37
45
|
def run(uri_db_source, uri_db_destination)
|
46
|
+
begin_time_stamp = Time.now
|
47
|
+
@global_count = 0
|
38
48
|
source_db = Sequel.connect(uri_db_source)
|
39
|
-
dump_schema(
|
49
|
+
dump_schema(uri_db_source)
|
40
50
|
parsed_destination_uri = URI(uri_db_destination)
|
41
51
|
destination_db_name = parsed_destination_uri.path.tr("/", "")
|
42
|
-
|
43
|
-
|
44
|
-
|
52
|
+
destination_connection =
|
53
|
+
Sequel.connect("postgres://#{parsed_destination_uri.user}:#{parsed_destination_uri.password}@#{parsed_destination_uri.host}:#{parsed_destination_uri.port || 5432}/template1")
|
54
|
+
destination_connection.run("DROP DATABASE IF EXISTS #{destination_db_name}")
|
55
|
+
destination_connection.run("CREATE DATABASE #{destination_db_name}")
|
56
|
+
destination_connection.disconnect
|
45
57
|
destination_db = Sequel.connect(uri_db_destination)
|
46
58
|
destination_db.run(File.read(SCHEMA_DUMP_PATH))
|
59
|
+
destination_pool = Sequel::ThreadedConnectionPool.new(destination_db)
|
60
|
+
source_pool = Sequel::ThreadedConnectionPool.new(source_db)
|
47
61
|
|
48
|
-
|
49
|
-
|
50
|
-
source_db.tables.each do |table|
|
51
|
-
if !row_filters?(table) && @config[table.to_s]['columns'].count < source_db.from(table).columns.count
|
62
|
+
source_db.tables.each do |t|
|
63
|
+
if !row_filters?(t) && @config[t.to_s]['columns'].count < source_db.from(t).columns.count
|
52
64
|
raise Toku::ColumnFilterMissingError
|
53
65
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
elsif f.is_a? Hash
|
60
|
-
row_filter = self.row_filters[f.keys.first.to_sym].new(f.values.first)
|
66
|
+
@threadpool.post do
|
67
|
+
destination_pool.hold do |destination_connection|
|
68
|
+
source_pool.hold do |source_connection|
|
69
|
+
process_table(t, source_connection.instance_variable_get(:@db), destination_connection.instance_variable_get(:@db))
|
70
|
+
end
|
61
71
|
end
|
62
|
-
|
63
|
-
row_enumerator = row_filter.call(row_enumerator)
|
64
72
|
end
|
65
|
-
|
66
|
-
row_enumerator = row_enumerator.map { |row| transform(row, table) }
|
67
|
-
destination_db.copy_into(table, data: row_enumerator, format: :csv)
|
68
|
-
count = destination_db[table].count
|
69
|
-
puts "Toku: copied #{count} objects into #{table} #{count != 0 ? ':)' : ':|'}"
|
70
73
|
end
|
71
74
|
|
75
|
+
@threadpool.shutdown
|
76
|
+
@threadpool.wait_for_termination
|
72
77
|
source_db.disconnect
|
73
78
|
destination_db.disconnect
|
74
79
|
FileUtils.rm(SCHEMA_DUMP_PATH)
|
80
|
+
puts "Toku: copied #{@global_count} elements in total and that took #{(Time.now - begin_time_stamp).round(2)} seconds with #{THREADPOOL_SIZE} green threads"
|
75
81
|
nil
|
76
82
|
end
|
77
83
|
|
78
84
|
# @param name [Symbol]
|
79
85
|
# @param row [Hash]
|
80
86
|
# @return [String]
|
81
|
-
def transform(row,
|
82
|
-
row.map do |
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
87
|
+
def transform(row, table_name)
|
88
|
+
row.map do |row_key, row_value|
|
89
|
+
@config[table_name.to_s]['columns'][row_key.to_s].inject(row_value) do |result, filter|
|
90
|
+
if filter.is_a? Hash
|
91
|
+
filter_class(column_filters, filter.keys.first.to_sym).new(
|
92
|
+
result,
|
93
|
+
filter.values.first
|
94
|
+
).call
|
95
|
+
elsif filter.is_a? String
|
96
|
+
filter_class(column_filters, filter.to_sym).new(result, {}).call
|
97
|
+
end
|
91
98
|
end
|
92
|
-
|
93
|
-
end.join(",") + "\n"
|
99
|
+
end.to_csv
|
94
100
|
end
|
95
101
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
102
|
+
def process_table(table, source_connection, destination_connection)
|
103
|
+
row_enumerator = source_connection[table].stream.lazy
|
104
|
+
@config[table.to_s]['rows'].each do |f|
|
105
|
+
row_filter = if f.is_a? String
|
106
|
+
self.row_filters[f.to_sym].new({})
|
107
|
+
elsif f.is_a? Hash
|
108
|
+
self.row_filters[f.keys.first.to_sym].new(f.values.first)
|
109
|
+
end
|
110
|
+
row_enumerator = row_filter.call(row_enumerator)
|
103
111
|
end
|
112
|
+
|
113
|
+
destination_connection.run("ALTER TABLE #{table} DISABLE TRIGGER ALL;")
|
114
|
+
destination_connection.copy_into(table, data: row_enumerator.map { |row| transform(row, table) }, format: :csv)
|
115
|
+
destination_connection.run("ALTER TABLE #{table} ENABLE TRIGGER ALL;")
|
116
|
+
count = destination_connection[table].count
|
117
|
+
@global_count += count
|
118
|
+
puts "Toku: copied #{count} objects into #{table} #{count != 0 ? ':)' : ':|'}"
|
119
|
+
end
|
120
|
+
|
121
|
+
# @param uri [String]
|
122
|
+
# @return [void]
|
123
|
+
def dump_schema(uri)
|
124
|
+
FileUtils::mkdir_p 'tmp'
|
125
|
+
host = URI(uri).host
|
126
|
+
password = URI(uri).password || ENV["PGPASSWORD"]
|
127
|
+
user = URI(uri).user
|
128
|
+
password = URI(uri).password
|
129
|
+
port = URI(uri).port || 5432
|
130
|
+
db_name = URI(uri).path.tr("/", "")
|
131
|
+
raise "pg_dump schema dump failed" unless system(
|
132
|
+
"PGPASSWORD=#{password} pg_dump -s -h #{host} -p #{port} -U #{user} #{db_name} > #{SCHEMA_DUMP_PATH}"
|
133
|
+
)
|
104
134
|
end
|
105
135
|
|
106
|
-
def
|
107
|
-
raise "
|
136
|
+
def filter_class(type, symbol)
|
137
|
+
raise "Please provide a filter for #{symbol}" if type[symbol].nil?
|
138
|
+
type[symbol]
|
108
139
|
end
|
109
140
|
|
110
141
|
# Are there row filters specified for this table?
|
data/lib/toku/version.rb
CHANGED
data/toku.gemspec
CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency 'sequel'
|
25
25
|
spec.add_dependency 'sequel_pg'
|
26
26
|
spec.add_dependency 'faker'
|
27
|
+
spec.add_dependency 'concurrent-ruby'
|
27
28
|
|
28
29
|
spec.add_development_dependency "bundler", "~> 1.13"
|
29
30
|
spec.add_development_dependency "rake", "~> 10.0"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toku
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.4.
|
4
|
+
version: 0.1.0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- PSKL
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-02-
|
12
|
+
date: 2018-02-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sequel
|
@@ -53,6 +53,20 @@ dependencies:
|
|
53
53
|
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: concurrent-ruby
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
56
70
|
- !ruby/object:Gem::Dependency
|
57
71
|
name: bundler
|
58
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,6 +120,8 @@ files:
|
|
106
120
|
- lib/toku/filters/column/faker_email.rb
|
107
121
|
- lib/toku/filters/column/faker_first_name.rb
|
108
122
|
- lib/toku/filters/column/faker_last_name.rb
|
123
|
+
- lib/toku/filters/column/nullify.rb
|
124
|
+
- lib/toku/filters/column/obfuscate.rb
|
109
125
|
- lib/toku/filters/column/passthrough.rb
|
110
126
|
- lib/toku/filters/column_filter.rb
|
111
127
|
- lib/toku/filters/row/drop.rb
|