dump_cleaner 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +25 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +295 -0
- data/Rakefile +8 -0
- data/doc/workflow_steps.md +1400 -0
- data/dump_cleaner.gemspec +38 -0
- data/exe/dump_cleaner +7 -0
- data/lib/dump_cleaner/cleaners/base_cleaner.rb +32 -0
- data/lib/dump_cleaner/cleaners/mysql_shell_dump_cleaner.rb +47 -0
- data/lib/dump_cleaner/cleaners/mysql_shell_dump_helpers.rb +11 -0
- data/lib/dump_cleaner/cleaners/mysql_shell_table_cleaner.rb +184 -0
- data/lib/dump_cleaner/cleanup/bytesize_helpers.rb +39 -0
- data/lib/dump_cleaner/cleanup/cleaning.rb +69 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix.rb +23 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/base.rb +33 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb +20 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/generate_random_string.rb +37 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/inspect_context.rb +16 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb +78 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_formatted_number.rb +63 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_number.rb +29 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_bytesize.rb +17 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_pattern.rb +20 -0
- data/lib/dump_cleaner/cleanup/cleaning_steps/take_sample.rb +28 -0
- data/lib/dump_cleaner/cleanup/data_source.rb +19 -0
- data/lib/dump_cleaner/cleanup/data_source_steps/base.rb +26 -0
- data/lib/dump_cleaner/cleanup/data_source_steps/group_by_bytesize.rb +37 -0
- data/lib/dump_cleaner/cleanup/data_source_steps/inspect_context.rb +16 -0
- data/lib/dump_cleaner/cleanup/data_source_steps/load_yaml_file.rb +24 -0
- data/lib/dump_cleaner/cleanup/data_source_steps/remove_accents.rb +29 -0
- data/lib/dump_cleaner/cleanup/inspection.rb +37 -0
- data/lib/dump_cleaner/cleanup/step_context.rb +46 -0
- data/lib/dump_cleaner/cleanup/uniqueness.rb +66 -0
- data/lib/dump_cleaner/cleanup/workflow.rb +38 -0
- data/lib/dump_cleaner/conditions.rb +42 -0
- data/lib/dump_cleaner/config.rb +109 -0
- data/lib/dump_cleaner/log.rb +42 -0
- data/lib/dump_cleaner/options.rb +46 -0
- data/lib/dump_cleaner/processor.rb +37 -0
- data/lib/dump_cleaner/version.rb +5 -0
- data/lib/dump_cleaner.rb +10 -0
- metadata +105 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
module DumpCleaner
|
2
|
+
require "logger"
|
3
|
+
|
4
|
+
class Log < ::Logger
|
5
|
+
require "singleton"
|
6
|
+
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
attr_reader :logger
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
super($stdout)
|
13
|
+
|
14
|
+
init_log_level
|
15
|
+
self.formatter = ->(severity, datetime, _progname, msg) { "#{datetime} #{severity}: #{msg}\n" }
|
16
|
+
end
|
17
|
+
|
18
|
+
def init_log_level
|
19
|
+
self.level = Logger::INFO
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.debug(&block)
|
23
|
+
instance.debug(&block)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.info(&block)
|
27
|
+
instance.info(&block)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.warn(&block)
|
31
|
+
instance.warn(&block)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.error(&block)
|
35
|
+
instance.error(&block)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.fatal(&block)
|
39
|
+
instance.fatal(&block)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DumpCleaner
|
4
|
+
class Options
|
5
|
+
require "optparse"
|
6
|
+
|
7
|
+
DEFAULT_OPTIONS = {
|
8
|
+
config_file: "config/dump_cleaner.yml"
|
9
|
+
}.freeze
|
10
|
+
|
11
|
+
attr_accessor :source_dump_path, :destination_dump_path, :config_file
|
12
|
+
|
13
|
+
def initialize(argv)
|
14
|
+
DEFAULT_OPTIONS.each { |k, v| send(:"#{k}=", v) }
|
15
|
+
parse(argv)
|
16
|
+
validate
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def parse(argv)
|
22
|
+
OptionParser.new do |parser|
|
23
|
+
parser.banner = "Usage: dump_cleaner -f source_dump -t cleaned_dump [options]"
|
24
|
+
|
25
|
+
parser.on("-f", "--from=SOURCE_DUMP_PATH",
|
26
|
+
"File or directory of the original (source) dump") do |option|
|
27
|
+
self.source_dump_path = option
|
28
|
+
end
|
29
|
+
parser.on("-t", "--to=DESTINATION_DUMP_PATH",
|
30
|
+
"File or directory of the cleaned (destination) dump") do |option|
|
31
|
+
self.destination_dump_path = option
|
32
|
+
end
|
33
|
+
parser.on("-c", "--config=CONFIG_FILE", "Configuration file path") do |option|
|
34
|
+
self.config_file = option
|
35
|
+
end
|
36
|
+
end.parse!(argv)
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate
|
40
|
+
if !source_dump_path || !destination_dump_path # rubocop:disable Style/GuardClause
|
41
|
+
raise ArgumentError, "Missing source or destination dump file or directory,
|
42
|
+
please use -f and -t options. Use -h for help.".gsub(/\s+/, " ")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DumpCleaner
|
4
|
+
class Processor
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
@options = options
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
start_time = Time.now
|
13
|
+
|
14
|
+
cleaner_class = case config.dump_format
|
15
|
+
when "mysql_shell"
|
16
|
+
Cleaners::MysqlShellDumpCleaner
|
17
|
+
else
|
18
|
+
raise Config::ConfigurationError, "Unsupported dump format #{config.dump_format}"
|
19
|
+
end
|
20
|
+
|
21
|
+
Log.debug { "Starting cleanup with #{cleaner_class}…" }
|
22
|
+
cleaner = cleaner_class.new(config:, options:)
|
23
|
+
cleaner.pre_cleanup
|
24
|
+
cleaner.clean
|
25
|
+
cleaner.post_cleanup
|
26
|
+
|
27
|
+
diff = Time.now - start_time
|
28
|
+
Log.info { "Finished in #{diff.div(60)}m #{(diff % 60).to_i}s." }
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def config
|
34
|
+
@config ||= Config.new(options.config_file)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/dump_cleaner.rb
ADDED
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dump_cleaner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matouš Borák
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-06-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: zeitwerk
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.6'
|
27
|
+
description: Deterministically anonymizes data in logical database dumps. Useful for
|
28
|
+
importing (anonymized) production data into development environments.
|
29
|
+
email:
|
30
|
+
- matous.borak@nejremeslnici.cz
|
31
|
+
executables:
|
32
|
+
- dump_cleaner
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- ".rspec"
|
37
|
+
- ".rubocop.yml"
|
38
|
+
- CHANGELOG.md
|
39
|
+
- LICENSE.txt
|
40
|
+
- README.md
|
41
|
+
- Rakefile
|
42
|
+
- doc/workflow_steps.md
|
43
|
+
- dump_cleaner.gemspec
|
44
|
+
- exe/dump_cleaner
|
45
|
+
- lib/dump_cleaner.rb
|
46
|
+
- lib/dump_cleaner/cleaners/base_cleaner.rb
|
47
|
+
- lib/dump_cleaner/cleaners/mysql_shell_dump_cleaner.rb
|
48
|
+
- lib/dump_cleaner/cleaners/mysql_shell_dump_helpers.rb
|
49
|
+
- lib/dump_cleaner/cleaners/mysql_shell_table_cleaner.rb
|
50
|
+
- lib/dump_cleaner/cleanup/bytesize_helpers.rb
|
51
|
+
- lib/dump_cleaner/cleanup/cleaning.rb
|
52
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix.rb
|
53
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/base.rb
|
54
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb
|
55
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/generate_random_string.rb
|
56
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/inspect_context.rb
|
57
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb
|
58
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/randomize_formatted_number.rb
|
59
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/randomize_number.rb
|
60
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_bytesize.rb
|
61
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_pattern.rb
|
62
|
+
- lib/dump_cleaner/cleanup/cleaning_steps/take_sample.rb
|
63
|
+
- lib/dump_cleaner/cleanup/data_source.rb
|
64
|
+
- lib/dump_cleaner/cleanup/data_source_steps/base.rb
|
65
|
+
- lib/dump_cleaner/cleanup/data_source_steps/group_by_bytesize.rb
|
66
|
+
- lib/dump_cleaner/cleanup/data_source_steps/inspect_context.rb
|
67
|
+
- lib/dump_cleaner/cleanup/data_source_steps/load_yaml_file.rb
|
68
|
+
- lib/dump_cleaner/cleanup/data_source_steps/remove_accents.rb
|
69
|
+
- lib/dump_cleaner/cleanup/inspection.rb
|
70
|
+
- lib/dump_cleaner/cleanup/step_context.rb
|
71
|
+
- lib/dump_cleaner/cleanup/uniqueness.rb
|
72
|
+
- lib/dump_cleaner/cleanup/workflow.rb
|
73
|
+
- lib/dump_cleaner/conditions.rb
|
74
|
+
- lib/dump_cleaner/config.rb
|
75
|
+
- lib/dump_cleaner/log.rb
|
76
|
+
- lib/dump_cleaner/options.rb
|
77
|
+
- lib/dump_cleaner/processor.rb
|
78
|
+
- lib/dump_cleaner/version.rb
|
79
|
+
homepage: https://github.com/NejRemeslnici/dump-cleaner
|
80
|
+
licenses:
|
81
|
+
- MIT
|
82
|
+
metadata:
|
83
|
+
homepage_uri: https://github.com/NejRemeslnici/dump-cleaner
|
84
|
+
source_code_uri: https://github.com/NejRemeslnici/dump-cleaner
|
85
|
+
changelog_uri: https://github.com/NejRemeslnici/dump-cleaner/blob/main/CHANGELOG.md
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 3.1.0
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubygems_version: 3.5.3
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: Anonymizes data in logical database dumps.
|
105
|
+
test_files: []
|