toku 0.1.0.4.1 → 0.1.0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +22 -1
- data/lib/toku.rb +24 -9
- data/lib/toku/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25837d845a4963a5f5cafaa77a28acfdfd0b1080
|
4
|
+
data.tar.gz: 0a5eed937d7ebf8289cf71a86edc92e349a5059a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b66bc251a30364fe0d876130271503f4daa7819d8eb7d693cf89926474aae6ab990fad0ac445a970cbdb1ccff69238133a0f7c957585c7bc4e696d180cb5fbf6
|
7
|
+
data.tar.gz: b2b95c8a083fe1cfb44d963a49b4d0763397f37b286098f8294502a3198996b028be87148d17823857f44df374ad68567990666798ca4e4d308564775b60a7d7
|
data/README.md
CHANGED
@@ -34,10 +34,31 @@ Users can define custom filters by implementing a `Toku::ColumnFilter` subclass
|
|
34
34
|
|
35
35
|
```ruby
|
36
36
|
module Toku
|
37
|
-
class
|
37
|
+
class ColumnFilter
|
38
|
+
# @param value [Object] initial value for the column
|
39
|
+
# @param options [Hash{String => Object}] arguments passed to the filter
|
38
40
|
def initialize(value, options)
|
41
|
+
@value = value
|
39
42
|
end
|
40
43
|
|
44
|
+
def call
|
45
|
+
@value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
```
|
50
|
+
|
51
|
+
or a `Toku::RowFilter` like so:
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
module Toku
|
55
|
+
class RowFilter
|
56
|
+
# @param options [Hash{String => Object}] arguments passed to the filter
|
57
|
+
def initialize(options)
|
58
|
+
end
|
59
|
+
|
60
|
+
# @param [LazyEnumerator] stream
|
61
|
+
# @return [LazyEnumerator] stream
|
41
62
|
def call(_)
|
42
63
|
_
|
43
64
|
end
|
data/lib/toku.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require "toku/version"
|
2
|
+
require "uri"
|
3
|
+
|
2
4
|
Dir[File.dirname(__FILE__) + "/toku/**/*.rb"].each { |file| require file }
|
3
5
|
|
4
6
|
module Toku
|
5
7
|
class Anonymizer
|
6
|
-
|
7
8
|
attr_accessor :column_filters
|
8
9
|
attr_accessor :row_filters
|
9
10
|
|
@@ -20,31 +21,37 @@ module Toku
|
|
20
21
|
drop: Toku::RowFilter::Drop
|
21
22
|
}
|
22
23
|
|
24
|
+
SCHEMA_DUMP_PATH = "/tmp/toku_source_schema_dump.sql"
|
25
|
+
|
23
26
|
# @param [String] config_file_path path of config file
|
24
27
|
def initialize(config_file_path, column_filters = {}, row_filters = {})
|
25
28
|
@config = YAML.load(File.read(config_file_path))
|
26
29
|
self.column_filters = column_filters.merge(COLUMN_FILTER_MAP)
|
27
30
|
self.row_filters = row_filters.merge(ROW_FILTER_MAP)
|
31
|
+
Sequel::Database.extension(:pg_streaming)
|
28
32
|
end
|
29
33
|
|
30
|
-
# @param [String] uri_db_source URI of the DB to be anonimized
|
31
|
-
# @param [String]
|
34
|
+
# @param uri_db_source [String] uri_db_source URI of the DB to be anonimized
|
35
|
+
# @param uri_db_destination [String] URI of the destination DB
|
32
36
|
# @return [void]
|
33
37
|
def run(uri_db_source, uri_db_destination)
|
34
|
-
|
35
38
|
source_db = Sequel.connect(uri_db_source)
|
39
|
+
dump_schema(URI(uri_db_source).path.tr("/", ""))
|
40
|
+
parsed_destination_uri = URI(uri_db_destination)
|
41
|
+
destination_db_name = parsed_destination_uri.path.tr("/", "")
|
42
|
+
destination_host = Sequel.connect("postgres://#{parsed_destination_uri.user}@#{parsed_destination_uri.host}:#{parsed_destination_uri.port}/template1")
|
43
|
+
destination_host.run("DROP DATABASE IF EXISTS #{destination_db_name}")
|
44
|
+
destination_host.run("CREATE DATABASE #{destination_db_name}")
|
36
45
|
destination_db = Sequel.connect(uri_db_destination)
|
46
|
+
destination_db.run(File.read(SCHEMA_DUMP_PATH))
|
37
47
|
|
38
48
|
raise Toku::SchemaMismatchError unless source_schema_included?(source_db, destination_db)
|
39
49
|
|
40
|
-
source_db.extension(:pg_streaming)
|
41
|
-
|
42
50
|
source_db.tables.each do |table|
|
43
51
|
if !row_filters?(table) && @config[table.to_s]['columns'].count < source_db.from(table).columns.count
|
44
52
|
raise Toku::ColumnFilterMissingError
|
45
53
|
end
|
46
54
|
row_enumerator = source_db[table].stream.lazy
|
47
|
-
destination_db[table].truncate
|
48
55
|
|
49
56
|
@config[table.to_s]['rows'].each do |f|
|
50
57
|
if f.is_a? String
|
@@ -61,11 +68,15 @@ module Toku
|
|
61
68
|
count = destination_db[table].count
|
62
69
|
puts "Toku: copied #{count} objects into #{table} #{count != 0 ? ':)' : ':|'}"
|
63
70
|
end
|
71
|
+
|
72
|
+
source_db.disconnect
|
73
|
+
destination_db.disconnect
|
74
|
+
FileUtils.rm(SCHEMA_DUMP_PATH)
|
64
75
|
nil
|
65
76
|
end
|
66
77
|
|
67
|
-
# @param [Symbol]
|
68
|
-
# @param [Hash]
|
78
|
+
# @param name [Symbol]
|
79
|
+
# @param row [Hash]
|
69
80
|
# @return [String]
|
70
81
|
def transform(row, name)
|
71
82
|
row.map do |k, v|
|
@@ -92,6 +103,10 @@ module Toku
|
|
92
103
|
end
|
93
104
|
end
|
94
105
|
|
106
|
+
def dump_schema(db_name)
|
107
|
+
raise "Dump failed" unless system("pg_dump", "-s", "-x", "-O", "-f", SCHEMA_DUMP_PATH, "#{db_name}")
|
108
|
+
end
|
109
|
+
|
95
110
|
# Are there row filters specified for this table?
|
96
111
|
# @param table [Symbol]
|
97
112
|
# @return [Boolean]
|
data/lib/toku/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toku
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.4.
|
4
|
+
version: 0.1.0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- PSKL
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-02-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|