dump_cleaner 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +25 -0
  4. data/CHANGELOG.md +5 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +295 -0
  7. data/Rakefile +8 -0
  8. data/doc/workflow_steps.md +1400 -0
  9. data/dump_cleaner.gemspec +38 -0
  10. data/exe/dump_cleaner +7 -0
  11. data/lib/dump_cleaner/cleaners/base_cleaner.rb +32 -0
  12. data/lib/dump_cleaner/cleaners/mysql_shell_dump_cleaner.rb +47 -0
  13. data/lib/dump_cleaner/cleaners/mysql_shell_dump_helpers.rb +11 -0
  14. data/lib/dump_cleaner/cleaners/mysql_shell_table_cleaner.rb +184 -0
  15. data/lib/dump_cleaner/cleanup/bytesize_helpers.rb +39 -0
  16. data/lib/dump_cleaner/cleanup/cleaning.rb +69 -0
  17. data/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix.rb +23 -0
  18. data/lib/dump_cleaner/cleanup/cleaning_steps/base.rb +33 -0
  19. data/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb +20 -0
  20. data/lib/dump_cleaner/cleanup/cleaning_steps/generate_random_string.rb +37 -0
  21. data/lib/dump_cleaner/cleanup/cleaning_steps/inspect_context.rb +16 -0
  22. data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb +78 -0
  23. data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_formatted_number.rb +63 -0
  24. data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_number.rb +29 -0
  25. data/lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_bytesize.rb +17 -0
  26. data/lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_pattern.rb +20 -0
  27. data/lib/dump_cleaner/cleanup/cleaning_steps/take_sample.rb +28 -0
  28. data/lib/dump_cleaner/cleanup/data_source.rb +19 -0
  29. data/lib/dump_cleaner/cleanup/data_source_steps/base.rb +26 -0
  30. data/lib/dump_cleaner/cleanup/data_source_steps/group_by_bytesize.rb +37 -0
  31. data/lib/dump_cleaner/cleanup/data_source_steps/inspect_context.rb +16 -0
  32. data/lib/dump_cleaner/cleanup/data_source_steps/load_yaml_file.rb +24 -0
  33. data/lib/dump_cleaner/cleanup/data_source_steps/remove_accents.rb +29 -0
  34. data/lib/dump_cleaner/cleanup/inspection.rb +37 -0
  35. data/lib/dump_cleaner/cleanup/step_context.rb +46 -0
  36. data/lib/dump_cleaner/cleanup/uniqueness.rb +66 -0
  37. data/lib/dump_cleaner/cleanup/workflow.rb +38 -0
  38. data/lib/dump_cleaner/conditions.rb +42 -0
  39. data/lib/dump_cleaner/config.rb +109 -0
  40. data/lib/dump_cleaner/log.rb +42 -0
  41. data/lib/dump_cleaner/options.rb +46 -0
  42. data/lib/dump_cleaner/processor.rb +37 -0
  43. data/lib/dump_cleaner/version.rb +5 -0
  44. data/lib/dump_cleaner.rb +10 -0
  45. metadata +105 -0
@@ -0,0 +1,42 @@
1
+ module DumpCleaner
2
+ require "logger"
3
+
4
+ class Log < ::Logger
5
+ require "singleton"
6
+
7
+ include Singleton
8
+
9
+ attr_reader :logger
10
+
11
+ def initialize
12
+ super($stdout)
13
+
14
+ init_log_level
15
+ self.formatter = ->(severity, datetime, _progname, msg) { "#{datetime} #{severity}: #{msg}\n" }
16
+ end
17
+
18
+ def init_log_level
19
+ self.level = Logger::INFO
20
+ end
21
+
22
+ def self.debug(&block)
23
+ instance.debug(&block)
24
+ end
25
+
26
+ def self.info(&block)
27
+ instance.info(&block)
28
+ end
29
+
30
+ def self.warn(&block)
31
+ instance.warn(&block)
32
+ end
33
+
34
+ def self.error(&block)
35
+ instance.error(&block)
36
+ end
37
+
38
+ def self.fatal(&block)
39
+ instance.fatal(&block)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DumpCleaner
4
+ class Options
5
+ require "optparse"
6
+
7
+ DEFAULT_OPTIONS = {
8
+ config_file: "config/dump_cleaner.yml"
9
+ }.freeze
10
+
11
+ attr_accessor :source_dump_path, :destination_dump_path, :config_file
12
+
13
+ def initialize(argv)
14
+ DEFAULT_OPTIONS.each { |k, v| send(:"#{k}=", v) }
15
+ parse(argv)
16
+ validate
17
+ end
18
+
19
+ private
20
+
21
+ def parse(argv)
22
+ OptionParser.new do |parser|
23
+ parser.banner = "Usage: dump_cleaner -f source_dump -t cleaned_dump [options]"
24
+
25
+ parser.on("-f", "--from=SOURCE_DUMP_PATH",
26
+ "File or directory of the original (source) dump") do |option|
27
+ self.source_dump_path = option
28
+ end
29
+ parser.on("-t", "--to=DESTINATION_DUMP_PATH",
30
+ "File or directory of the cleaned (destination) dump") do |option|
31
+ self.destination_dump_path = option
32
+ end
33
+ parser.on("-c", "--config=CONFIG_FILE", "Configuration file path") do |option|
34
+ self.config_file = option
35
+ end
36
+ end.parse!(argv)
37
+ end
38
+
39
+ def validate
40
+ if !source_dump_path || !destination_dump_path # rubocop:disable Style/GuardClause
41
+ raise ArgumentError, "Missing source or destination dump file or directory,
42
+ please use -f and -t options. Use -h for help.".gsub(/\s+/, " ")
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DumpCleaner
4
+ class Processor
5
+ attr_reader :options
6
+
7
+ def initialize(options)
8
+ @options = options
9
+ end
10
+
11
+ def run
12
+ start_time = Time.now
13
+
14
+ cleaner_class = case config.dump_format
15
+ when "mysql_shell"
16
+ Cleaners::MysqlShellDumpCleaner
17
+ else
18
+ raise Config::ConfigurationError, "Unsupported dump format #{config.dump_format}"
19
+ end
20
+
21
+ Log.debug { "Starting cleanup with #{cleaner_class}…" }
22
+ cleaner = cleaner_class.new(config:, options:)
23
+ cleaner.pre_cleanup
24
+ cleaner.clean
25
+ cleaner.post_cleanup
26
+
27
+ diff = Time.now - start_time
28
+ Log.info { "Finished in #{diff.div(60)}m #{(diff % 60).to_i}s." }
29
+ end
30
+
31
+ private
32
+
33
+ def config
34
+ @config ||= Config.new(options.config_file)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DumpCleaner
4
+ VERSION = "0.5.0"
5
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zeitwerk"
4
+ loader = Zeitwerk::Loader.for_gem
5
+ loader.setup
6
+
7
+ module DumpCleaner
8
+ class Error < StandardError; end
9
+ # Your code goes here...
10
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dump_cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Matouš Borák
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-06-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: zeitwerk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.6'
27
+ description: Deterministically anonymizes data in logical database dumps. Useful for
28
+ importing (anonymized) production data into development environments.
29
+ email:
30
+ - matous.borak@nejremeslnici.cz
31
+ executables:
32
+ - dump_cleaner
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - ".rspec"
37
+ - ".rubocop.yml"
38
+ - CHANGELOG.md
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - doc/workflow_steps.md
43
+ - dump_cleaner.gemspec
44
+ - exe/dump_cleaner
45
+ - lib/dump_cleaner.rb
46
+ - lib/dump_cleaner/cleaners/base_cleaner.rb
47
+ - lib/dump_cleaner/cleaners/mysql_shell_dump_cleaner.rb
48
+ - lib/dump_cleaner/cleaners/mysql_shell_dump_helpers.rb
49
+ - lib/dump_cleaner/cleaners/mysql_shell_table_cleaner.rb
50
+ - lib/dump_cleaner/cleanup/bytesize_helpers.rb
51
+ - lib/dump_cleaner/cleanup/cleaning.rb
52
+ - lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix.rb
53
+ - lib/dump_cleaner/cleanup/cleaning_steps/base.rb
54
+ - lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb
55
+ - lib/dump_cleaner/cleanup/cleaning_steps/generate_random_string.rb
56
+ - lib/dump_cleaner/cleanup/cleaning_steps/inspect_context.rb
57
+ - lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb
58
+ - lib/dump_cleaner/cleanup/cleaning_steps/randomize_formatted_number.rb
59
+ - lib/dump_cleaner/cleanup/cleaning_steps/randomize_number.rb
60
+ - lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_bytesize.rb
61
+ - lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_pattern.rb
62
+ - lib/dump_cleaner/cleanup/cleaning_steps/take_sample.rb
63
+ - lib/dump_cleaner/cleanup/data_source.rb
64
+ - lib/dump_cleaner/cleanup/data_source_steps/base.rb
65
+ - lib/dump_cleaner/cleanup/data_source_steps/group_by_bytesize.rb
66
+ - lib/dump_cleaner/cleanup/data_source_steps/inspect_context.rb
67
+ - lib/dump_cleaner/cleanup/data_source_steps/load_yaml_file.rb
68
+ - lib/dump_cleaner/cleanup/data_source_steps/remove_accents.rb
69
+ - lib/dump_cleaner/cleanup/inspection.rb
70
+ - lib/dump_cleaner/cleanup/step_context.rb
71
+ - lib/dump_cleaner/cleanup/uniqueness.rb
72
+ - lib/dump_cleaner/cleanup/workflow.rb
73
+ - lib/dump_cleaner/conditions.rb
74
+ - lib/dump_cleaner/config.rb
75
+ - lib/dump_cleaner/log.rb
76
+ - lib/dump_cleaner/options.rb
77
+ - lib/dump_cleaner/processor.rb
78
+ - lib/dump_cleaner/version.rb
79
+ homepage: https://github.com/NejRemeslnici/dump-cleaner
80
+ licenses:
81
+ - MIT
82
+ metadata:
83
+ homepage_uri: https://github.com/NejRemeslnici/dump-cleaner
84
+ source_code_uri: https://github.com/NejRemeslnici/dump-cleaner
85
+ changelog_uri: https://github.com/NejRemeslnici/dump-cleaner/blob/main/CHANGELOG.md
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 3.1.0
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubygems_version: 3.5.3
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Anonymizes data in logical database dumps.
105
+ test_files: []