pg_dump_anonymize 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/pg_dump_anonymize/definition.rb +39 -3
- data/lib/pg_dump_anonymize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee33e3ece88467be6ffb983cb2ff09f4f4097055540ec045dbc5257d6d44c442
|
4
|
+
data.tar.gz: b2d7472cd7b919a2c0b277a77d455a10f904d22cc8a9f5ebda44b7d2aa81553e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8867ff2d50cfff7b2aaf4bca2ab1235727c5ad47f5f91bb3e1482680080de5e7b61f462eaaf7f27d8d86c18b12af410bef175e4e029b8294dc2399a5aff2d59
|
7
|
+
data.tar.gz: ab07bff2fa4502d8a5c6805ae7e1c2162d68cd927cccc51542ea91088974e8625a65bd1c81a90b179f60f1280c52d5458509de132428df8c6d559b6243b9aea9
|
data/Gemfile.lock
CHANGED
@@ -13,6 +13,10 @@ module PgDumpAnonymize
|
|
13
13
|
if @current_table
|
14
14
|
if end_stdin?(line)
|
15
15
|
clear_current_table
|
16
|
+
elsif skip?(line)
|
17
|
+
# do nothing
|
18
|
+
elsif delete?(line)
|
19
|
+
line = ''
|
16
20
|
else
|
17
21
|
line = anonymize_line(line)
|
18
22
|
end
|
@@ -27,13 +31,16 @@ module PgDumpAnonymize
|
|
27
31
|
# This assumes the line is a tab delimited data line
|
28
32
|
def anonymize_line(line)
|
29
33
|
values = line.split("\t")
|
30
|
-
row_context = {} # used to share state for a row
|
34
|
+
row_context = { row: row_to_hash(values) } # used to share state for a row
|
31
35
|
@positional_substitutions.each do |index, val_def|
|
32
36
|
values[index] = if val_def.is_a?(Proc)
|
33
37
|
val_def.call(*[values[index], row_context].slice(0, val_def.arity))
|
34
38
|
else
|
35
39
|
val_def
|
36
40
|
end
|
41
|
+
|
42
|
+
# Postgres represents nil/null as '\N' in SQL dumps
|
43
|
+
values[index] = '\N' if values[index].nil?
|
37
44
|
end
|
38
45
|
values.join("\t")
|
39
46
|
end
|
@@ -52,10 +59,10 @@ module PgDumpAnonymize
|
|
52
59
|
# Finds the positional range of the attribute to be replaced
|
53
60
|
# returns an array of arrays. The inner array is [<field_index>, <anonymous_value>]
|
54
61
|
def find_positions(fields_str, rules)
|
55
|
-
fields = fields_str.gsub('"', '').split(', ')
|
62
|
+
@fields = fields_str.gsub('"', '').split(', ')
|
56
63
|
|
57
64
|
rules.map do |target_field, val|
|
58
|
-
index = fields.index(target_field.to_s)
|
65
|
+
index = @fields.index(target_field.to_s)
|
59
66
|
[index, val] if index
|
60
67
|
end.compact
|
61
68
|
end
|
@@ -75,7 +82,36 @@ module PgDumpAnonymize
|
|
75
82
|
|
76
83
|
def clear_current_table
|
77
84
|
@current_table = nil
|
85
|
+
@fields = nil
|
78
86
|
@positional_substitutions = nil
|
79
87
|
end
|
88
|
+
|
89
|
+
def skip?(row)
|
90
|
+
if (skip_if = @attribute_rules.dig(@current_table, :_skip_if))
|
91
|
+
!!skip_if.call(row_to_hash(row))
|
92
|
+
else
|
93
|
+
false
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def delete?(row)
|
98
|
+
if (delete_if = @attribute_rules.dig(@current_table, :_delete_if))
|
99
|
+
!!delete_if.call(row_to_hash(row))
|
100
|
+
else
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def row_to_hash(row)
|
106
|
+
return nil unless @fields
|
107
|
+
|
108
|
+
values = row.is_a?(String) ? row.split("\t") : row
|
109
|
+
|
110
|
+
begin
|
111
|
+
Hash[*@fields.zip(values).flatten]
|
112
|
+
rescue StandardError => e
|
113
|
+
raise "#{e.message}, row_to_hash error encountered: current_table: #{@current_table} -- fields(#{@fields&.length}): #{@fields} -- values(#{values&.length}): #{values}"
|
114
|
+
end
|
115
|
+
end
|
80
116
|
end
|
81
117
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_dump_anonymize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sean McCleary
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Given the default pg_dump text SQL dump, this can take a simple definition
|
14
14
|
of tables and fields to anonymize and efficiently anonymize the dump.
|