toku 0.1.0.4.3 → 0.1.0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1bd1fc5c406f8607ac17212e37ccca92a608b5b7
4
- data.tar.gz: 6f75492ed9f5f4e78b813993c74a9264cc8fad3e
3
+ metadata.gz: d14fc10f4732aaa04b7d3fab0da39e55eb35c0d9
4
+ data.tar.gz: 72a5299d37764a11eb1556f3125b8fae80c8518e
5
5
  SHA512:
6
- metadata.gz: 370d647af3b547f00da42f68d6d8816d5195180821048aae7d876afa5eef98515e4c92c107f1f561d618000107e79456ac3eaa1f5bf3bb84b3d8d0ae45cc9e8b
7
- data.tar.gz: b0edd338a317f66fbdab1eb15f16950a1b17e72557b10725b29eb2598c4d9b577bb5416fdb5c073b2d86ed1922b478e2f7f63c7ed11898935a7b1cdc32c6294a
6
+ metadata.gz: b132102fd9d06dfcb2eda58fa76801bbf26a0f7990e0b4df2a5a035aa8b276d9e278b56d22a0e844ad4ee4da3b2af10481bce5cb7e0673a7c14d00b42bfb9dd6
7
+ data.tar.gz: f78d11cfa53c5502e818f9c6e03c1f324a489df3da064bda01b7df58e3e6feabce933cc03743e5fdaa7013c3d6772805e4941c56761a06c8d1111ea99f02d389
@@ -1,18 +1,25 @@
1
1
  require 'yaml'
2
2
  # Run only in a Rails environment
3
-
3
+ # Run this task to generate a base config file for your Rails application
4
+ # after run it's up to you to whitelist specific arguments in the list
4
5
  namespace :toku do
5
6
  desc 'Generate base config file from your base database schema in Rails'
6
7
  task genesis_rails: :environment do
7
- File.open(Rails.root.join('tmp').join('config.yml'), 'w') do |f|
8
+ path = Rails.root.join('tmp').join('config.yml')
9
+ File.open(path, 'w') do |f|
8
10
  hash = {}
9
11
  ActiveRecord::Base.connection.tables.each do |table|
10
12
  hash[table] = { 'columns' => {}, 'rows' => [] }
11
13
  ActiveRecord::Base.connection.columns(table).map(&:name).each do |column|
12
- hash[table]['columns'][column] = ['drop']
14
+ if column.ends_with?("_at") || column.ends_with?("id") || column == 'id'
15
+ hash[table]['columns'][column] = ['none']
16
+ else
17
+ hash[table]['columns'][column] = ['obfuscate']
18
+ end
13
19
  end
14
20
  end
15
21
  f.write(hash.to_yaml)
16
22
  end
23
+ puts "Pickup your config file in #{path}"
17
24
  end
18
25
  end
@@ -1,5 +1,8 @@
1
1
  require "toku/version"
2
2
  require "uri"
3
+ require 'sequel'
4
+ require 'csv'
5
+ require 'concurrent'
3
6
 
4
7
  Dir[File.dirname(__FILE__) + "/toku/**/*.rb"].each { |file| require file }
5
8
 
@@ -13,7 +16,9 @@ module Toku
13
16
  none: Toku::ColumnFilter::Passthrough,
14
17
  faker_last_name: Toku::ColumnFilter::FakerLastName,
15
18
  faker_first_name: Toku::ColumnFilter::FakerFirstName,
16
- faker_email: Toku::ColumnFilter::FakerEmail
19
+ faker_email: Toku::ColumnFilter::FakerEmail,
20
+ obfuscate: Toku::ColumnFilter::Obfuscate,
21
+ nullify: Toku::ColumnFilter::Nullify
17
22
  }
18
23
 
19
24
  # A few default row filters mappings
@@ -21,11 +26,14 @@ module Toku
21
26
  drop: Toku::RowFilter::Drop
22
27
  }
23
28
 
24
- SCHEMA_DUMP_PATH = "/tmp/toku_source_schema_dump.sql"
29
+ THREADPOOL_SIZE = (Concurrent.processor_count * 2).freeze
30
+
31
+ SCHEMA_DUMP_PATH = "tmp/toku_source_schema_dump.sql"
25
32
 
26
33
  # @param [String] config_file_path path of config file
27
34
  def initialize(config_file_path, column_filters = {}, row_filters = {})
28
- @config = YAML.load(File.read(config_file_path))
35
+ @config = YAML.load(ERB.new(File.read(config_file_path)).result)
36
+ @threadpool = Concurrent::FixedThreadPool.new(THREADPOOL_SIZE)
29
37
  self.column_filters = column_filters.merge(COLUMN_FILTER_MAP)
30
38
  self.row_filters = row_filters.merge(ROW_FILTER_MAP)
31
39
  Sequel::Database.extension(:pg_streaming)
@@ -35,76 +43,99 @@ module Toku
35
43
  # @param uri_db_destination [String] URI of the destination DB
36
44
  # @return [void]
37
45
  def run(uri_db_source, uri_db_destination)
46
+ begin_time_stamp = Time.now
47
+ @global_count = 0
38
48
  source_db = Sequel.connect(uri_db_source)
39
- dump_schema(URI(uri_db_source).path.tr("/", ""))
49
+ dump_schema(uri_db_source)
40
50
  parsed_destination_uri = URI(uri_db_destination)
41
51
  destination_db_name = parsed_destination_uri.path.tr("/", "")
42
- destination_host = Sequel.connect("postgres://#{parsed_destination_uri.user}@#{parsed_destination_uri.host}:#{parsed_destination_uri.port}/template1")
43
- destination_host.run("DROP DATABASE IF EXISTS #{destination_db_name}")
44
- destination_host.run("CREATE DATABASE #{destination_db_name}")
52
+ destination_connection =
53
+ Sequel.connect("postgres://#{parsed_destination_uri.user}:#{parsed_destination_uri.password}@#{parsed_destination_uri.host}:#{parsed_destination_uri.port || 5432}/template1")
54
+ destination_connection.run("DROP DATABASE IF EXISTS #{destination_db_name}")
55
+ destination_connection.run("CREATE DATABASE #{destination_db_name}")
56
+ destination_connection.disconnect
45
57
  destination_db = Sequel.connect(uri_db_destination)
46
58
  destination_db.run(File.read(SCHEMA_DUMP_PATH))
59
+ destination_pool = Sequel::ThreadedConnectionPool.new(destination_db)
60
+ source_pool = Sequel::ThreadedConnectionPool.new(source_db)
47
61
 
48
- raise Toku::SchemaMismatchError unless source_schema_included?(source_db, destination_db)
49
-
50
- source_db.tables.each do |table|
51
- if !row_filters?(table) && @config[table.to_s]['columns'].count < source_db.from(table).columns.count
62
+ source_db.tables.each do |t|
63
+ if !row_filters?(t) && @config[t.to_s]['columns'].count < source_db.from(t).columns.count
52
64
  raise Toku::ColumnFilterMissingError
53
65
  end
54
- row_enumerator = source_db[table].stream.lazy
55
-
56
- @config[table.to_s]['rows'].each do |f|
57
- if f.is_a? String
58
- row_filter = self.row_filters[f.to_sym].new({})
59
- elsif f.is_a? Hash
60
- row_filter = self.row_filters[f.keys.first.to_sym].new(f.values.first)
66
+ @threadpool.post do
67
+ destination_pool.hold do |destination_connection|
68
+ source_pool.hold do |source_connection|
69
+ process_table(t, source_connection.instance_variable_get(:@db), destination_connection.instance_variable_get(:@db))
70
+ end
61
71
  end
62
-
63
- row_enumerator = row_filter.call(row_enumerator)
64
72
  end
65
-
66
- row_enumerator = row_enumerator.map { |row| transform(row, table) }
67
- destination_db.copy_into(table, data: row_enumerator, format: :csv)
68
- count = destination_db[table].count
69
- puts "Toku: copied #{count} objects into #{table} #{count != 0 ? ':)' : ':|'}"
70
73
  end
71
74
 
75
+ @threadpool.shutdown
76
+ @threadpool.wait_for_termination
72
77
  source_db.disconnect
73
78
  destination_db.disconnect
74
79
  FileUtils.rm(SCHEMA_DUMP_PATH)
80
+ puts "Toku: copied #{@global_count} elements in total and that took #{(Time.now - begin_time_stamp).round(2)} seconds with #{THREADPOOL_SIZE} green threads"
75
81
  nil
76
82
  end
77
83
 
78
84
  # @param name [Symbol]
79
85
  # @param row [Hash]
80
86
  # @return [String]
81
- def transform(row, name)
82
- row.map do |k, v|
83
- filter_params = @config[name.to_s]['columns'][k.to_s].first
84
- if filter_params.is_a? Hash
85
- column_filter = self.column_filters[filter_params.keys.first.to_sym].new(
86
- v,
87
- filter_params.values.first
88
- )
89
- elsif filter_params.is_a? String
90
- column_filter = self.column_filters[filter_params.to_sym].new(v, {})
87
+ def transform(row, table_name)
88
+ row.map do |row_key, row_value|
89
+ @config[table_name.to_s]['columns'][row_key.to_s].inject(row_value) do |result, filter|
90
+ if filter.is_a? Hash
91
+ filter_class(column_filters, filter.keys.first.to_sym).new(
92
+ result,
93
+ filter.values.first
94
+ ).call
95
+ elsif filter.is_a? String
96
+ filter_class(column_filters, filter.to_sym).new(result, {}).call
97
+ end
91
98
  end
92
- column_filter.call
93
- end.join(",") + "\n"
99
+ end.to_csv
94
100
  end
95
101
 
96
- # Is the source database schema a subset of the destination database schema?
97
- # @param source_db [String] URI of source database
98
- # @param destination_db [String] URI of destination database
99
- # @return [Boolean]
100
- def source_schema_included?(source_db, destination_db)
101
- source_db.tables.all? do |table|
102
- source_db.schema(table) == destination_db.schema(table)
102
+ def process_table(table, source_connection, destination_connection)
103
+ row_enumerator = source_connection[table].stream.lazy
104
+ @config[table.to_s]['rows'].each do |f|
105
+ row_filter = if f.is_a? String
106
+ self.row_filters[f.to_sym].new({})
107
+ elsif f.is_a? Hash
108
+ self.row_filters[f.keys.first.to_sym].new(f.values.first)
109
+ end
110
+ row_enumerator = row_filter.call(row_enumerator)
103
111
  end
112
+
113
+ destination_connection.run("ALTER TABLE #{table} DISABLE TRIGGER ALL;")
114
+ destination_connection.copy_into(table, data: row_enumerator.map { |row| transform(row, table) }, format: :csv)
115
+ destination_connection.run("ALTER TABLE #{table} ENABLE TRIGGER ALL;")
116
+ count = destination_connection[table].count
117
+ @global_count += count
118
+ puts "Toku: copied #{count} objects into #{table} #{count != 0 ? ':)' : ':|'}"
119
+ end
120
+
121
+ # @param uri [String]
122
+ # @return [void]
123
+ def dump_schema(uri)
124
+ FileUtils::mkdir_p 'tmp'
125
+ host = URI(uri).host
126
+ password = URI(uri).password || ENV["PGPASSWORD"]
127
+ user = URI(uri).user
128
+ password = URI(uri).password
129
+ port = URI(uri).port || 5432
130
+ db_name = URI(uri).path.tr("/", "")
131
+ raise "pg_dump schema dump failed" unless system(
132
+ "PGPASSWORD=#{password} pg_dump -s -h #{host} -p #{port} -U #{user} #{db_name} > #{SCHEMA_DUMP_PATH}"
133
+ )
104
134
  end
105
135
 
106
- def dump_schema(db_name)
107
- raise "Dump failed" unless system("pg_dump", "-s", "-x", "-O", "-f", SCHEMA_DUMP_PATH, "#{db_name}")
136
+ def filter_class(type, symbol)
137
+ raise "Please provide a filter for #{symbol}" if type[symbol].nil?
138
+ type[symbol]
108
139
  end
109
140
 
110
141
  # Are there row filters specified for this table?
@@ -0,0 +1,9 @@
1
+ module Toku
2
+ class ColumnFilter
3
+ class Nullify < Toku::ColumnFilter
4
+ def initialize(value, options)
5
+ @value = nil
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Toku
2
+ class ColumnFilter
3
+ class Obfuscate < Toku::ColumnFilter
4
+ def initialize(value, options)
5
+ @value = SecureRandom.hex(10)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,3 +1,3 @@
1
1
  module Toku
2
- VERSION = "0.1.0.4.3"
2
+ VERSION = "0.1.0.4.4"
3
3
  end
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency 'sequel'
25
25
  spec.add_dependency 'sequel_pg'
26
26
  spec.add_dependency 'faker'
27
+ spec.add_dependency 'concurrent-ruby'
27
28
 
28
29
  spec.add_development_dependency "bundler", "~> 1.13"
29
30
  spec.add_development_dependency "rake", "~> 10.0"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: toku
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.4.3
4
+ version: 0.1.0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - PSKL
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2018-02-20 00:00:00.000000000 Z
12
+ date: 2018-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sequel
@@ -53,6 +53,20 @@ dependencies:
53
53
  - - ">="
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: concurrent-ruby
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
56
70
  - !ruby/object:Gem::Dependency
57
71
  name: bundler
58
72
  requirement: !ruby/object:Gem::Requirement
@@ -106,6 +120,8 @@ files:
106
120
  - lib/toku/filters/column/faker_email.rb
107
121
  - lib/toku/filters/column/faker_first_name.rb
108
122
  - lib/toku/filters/column/faker_last_name.rb
123
+ - lib/toku/filters/column/nullify.rb
124
+ - lib/toku/filters/column/obfuscate.rb
109
125
  - lib/toku/filters/column/passthrough.rb
110
126
  - lib/toku/filters/column_filter.rb
111
127
  - lib/toku/filters/row/drop.rb