pg_dump_anonymize 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/pg_dump_anonymize/definition.rb +39 -3
- data/lib/pg_dump_anonymize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee33e3ece88467be6ffb983cb2ff09f4f4097055540ec045dbc5257d6d44c442
|
4
|
+
data.tar.gz: b2d7472cd7b919a2c0b277a77d455a10f904d22cc8a9f5ebda44b7d2aa81553e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8867ff2d50cfff7b2aaf4bca2ab1235727c5ad47f5f91bb3e1482680080de5e7b61f462eaaf7f27d8d86c18b12af410bef175e4e029b8294dc2399a5aff2d59
|
7
|
+
data.tar.gz: ab07bff2fa4502d8a5c6805ae7e1c2162d68cd927cccc51542ea91088974e8625a65bd1c81a90b179f60f1280c52d5458509de132428df8c6d559b6243b9aea9
|
data/Gemfile.lock
CHANGED
@@ -13,6 +13,10 @@ module PgDumpAnonymize
|
|
13
13
|
if @current_table
|
14
14
|
if end_stdin?(line)
|
15
15
|
clear_current_table
|
16
|
+
elsif skip?(line)
|
17
|
+
# do nothing
|
18
|
+
elsif delete?(line)
|
19
|
+
line = ''
|
16
20
|
else
|
17
21
|
line = anonymize_line(line)
|
18
22
|
end
|
@@ -27,13 +31,16 @@ module PgDumpAnonymize
|
|
27
31
|
# This assumes the line is a tab delimited data line
|
28
32
|
def anonymize_line(line)
|
29
33
|
values = line.split("\t")
|
30
|
-
row_context = {} # used to share state for a row
|
34
|
+
row_context = { row: row_to_hash(values) } # used to share state for a row
|
31
35
|
@positional_substitutions.each do |index, val_def|
|
32
36
|
values[index] = if val_def.is_a?(Proc)
|
33
37
|
val_def.call(*[values[index], row_context].slice(0, val_def.arity))
|
34
38
|
else
|
35
39
|
val_def
|
36
40
|
end
|
41
|
+
|
42
|
+
# Postgres represents nil/null as '\N' in SQL dumps
|
43
|
+
values[index] = '\N' if values[index].nil?
|
37
44
|
end
|
38
45
|
values.join("\t")
|
39
46
|
end
|
@@ -52,10 +59,10 @@ module PgDumpAnonymize
|
|
52
59
|
# Finds the positional range of the attribute to be replaced
|
53
60
|
# returns an array of arrays. The inner array is [<field_index>, <anonymous_value>]
|
54
61
|
def find_positions(fields_str, rules)
|
55
|
-
fields = fields_str.gsub('"', '').split(', ')
|
62
|
+
@fields = fields_str.gsub('"', '').split(', ')
|
56
63
|
|
57
64
|
rules.map do |target_field, val|
|
58
|
-
index = fields.index(target_field.to_s)
|
65
|
+
index = @fields.index(target_field.to_s)
|
59
66
|
[index, val] if index
|
60
67
|
end.compact
|
61
68
|
end
|
@@ -75,7 +82,36 @@ module PgDumpAnonymize
|
|
75
82
|
|
76
83
|
def clear_current_table
|
77
84
|
@current_table = nil
|
85
|
+
@fields = nil
|
78
86
|
@positional_substitutions = nil
|
79
87
|
end
|
88
|
+
|
89
|
+
def skip?(row)
|
90
|
+
if (skip_if = @attribute_rules.dig(@current_table, :_skip_if))
|
91
|
+
!!skip_if.call(row_to_hash(row))
|
92
|
+
else
|
93
|
+
false
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def delete?(row)
|
98
|
+
if (delete_if = @attribute_rules.dig(@current_table, :_delete_if))
|
99
|
+
!!delete_if.call(row_to_hash(row))
|
100
|
+
else
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def row_to_hash(row)
|
106
|
+
return nil unless @fields
|
107
|
+
|
108
|
+
values = row.is_a?(String) ? row.split("\t") : row
|
109
|
+
|
110
|
+
begin
|
111
|
+
Hash[*@fields.zip(values).flatten]
|
112
|
+
rescue StandardError => e
|
113
|
+
raise "#{e.message}, row_to_hash error encountered: current_table: #{@current_table} -- fields(#{@fields&.length}): #{@fields} -- values(#{values&.length}): #{values}"
|
114
|
+
end
|
115
|
+
end
|
80
116
|
end
|
81
117
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_dump_anonymize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sean McCleary
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Given the default pg_dump text SQL dump, this can take a simple definition
|
14
14
|
of tables and fields to anonymize and efficiently anonymize the dump.
|