dump_cleaner 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +25 -0
  4. data/CHANGELOG.md +5 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +295 -0
  7. data/Rakefile +8 -0
  8. data/doc/workflow_steps.md +1400 -0
  9. data/dump_cleaner.gemspec +38 -0
  10. data/exe/dump_cleaner +7 -0
  11. data/lib/dump_cleaner/cleaners/base_cleaner.rb +32 -0
  12. data/lib/dump_cleaner/cleaners/mysql_shell_dump_cleaner.rb +47 -0
  13. data/lib/dump_cleaner/cleaners/mysql_shell_dump_helpers.rb +11 -0
  14. data/lib/dump_cleaner/cleaners/mysql_shell_table_cleaner.rb +184 -0
  15. data/lib/dump_cleaner/cleanup/bytesize_helpers.rb +39 -0
  16. data/lib/dump_cleaner/cleanup/cleaning.rb +69 -0
  17. data/lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix.rb +23 -0
  18. data/lib/dump_cleaner/cleanup/cleaning_steps/base.rb +33 -0
  19. data/lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb +20 -0
  20. data/lib/dump_cleaner/cleanup/cleaning_steps/generate_random_string.rb +37 -0
  21. data/lib/dump_cleaner/cleanup/cleaning_steps/inspect_context.rb +16 -0
  22. data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb +78 -0
  23. data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_formatted_number.rb +63 -0
  24. data/lib/dump_cleaner/cleanup/cleaning_steps/randomize_number.rb +29 -0
  25. data/lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_bytesize.rb +17 -0
  26. data/lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_pattern.rb +20 -0
  27. data/lib/dump_cleaner/cleanup/cleaning_steps/take_sample.rb +28 -0
  28. data/lib/dump_cleaner/cleanup/data_source.rb +19 -0
  29. data/lib/dump_cleaner/cleanup/data_source_steps/base.rb +26 -0
  30. data/lib/dump_cleaner/cleanup/data_source_steps/group_by_bytesize.rb +37 -0
  31. data/lib/dump_cleaner/cleanup/data_source_steps/inspect_context.rb +16 -0
  32. data/lib/dump_cleaner/cleanup/data_source_steps/load_yaml_file.rb +24 -0
  33. data/lib/dump_cleaner/cleanup/data_source_steps/remove_accents.rb +29 -0
  34. data/lib/dump_cleaner/cleanup/inspection.rb +37 -0
  35. data/lib/dump_cleaner/cleanup/step_context.rb +46 -0
  36. data/lib/dump_cleaner/cleanup/uniqueness.rb +66 -0
  37. data/lib/dump_cleaner/cleanup/workflow.rb +38 -0
  38. data/lib/dump_cleaner/conditions.rb +42 -0
  39. data/lib/dump_cleaner/config.rb +109 -0
  40. data/lib/dump_cleaner/log.rb +42 -0
  41. data/lib/dump_cleaner/options.rb +46 -0
  42. data/lib/dump_cleaner/processor.rb +37 -0
  43. data/lib/dump_cleaner/version.rb +5 -0
  44. data/lib/dump_cleaner.rb +10 -0
  45. metadata +105 -0
@@ -0,0 +1,42 @@
1
+ module DumpCleaner
2
+ require "logger"
3
+
4
+ class Log < ::Logger
5
+ require "singleton"
6
+
7
+ include Singleton
8
+
9
+ attr_reader :logger
10
+
11
+ def initialize
12
+ super($stdout)
13
+
14
+ init_log_level
15
+ self.formatter = ->(severity, datetime, _progname, msg) { "#{datetime} #{severity}: #{msg}\n" }
16
+ end
17
+
18
+ def init_log_level
19
+ self.level = Logger::INFO
20
+ end
21
+
22
+ def self.debug(&block)
23
+ instance.debug(&block)
24
+ end
25
+
26
+ def self.info(&block)
27
+ instance.info(&block)
28
+ end
29
+
30
+ def self.warn(&block)
31
+ instance.warn(&block)
32
+ end
33
+
34
+ def self.error(&block)
35
+ instance.error(&block)
36
+ end
37
+
38
+ def self.fatal(&block)
39
+ instance.fatal(&block)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DumpCleaner
4
+ class Options
5
+ require "optparse"
6
+
7
+ DEFAULT_OPTIONS = {
8
+ config_file: "config/dump_cleaner.yml"
9
+ }.freeze
10
+
11
+ attr_accessor :source_dump_path, :destination_dump_path, :config_file
12
+
13
+ def initialize(argv)
14
+ DEFAULT_OPTIONS.each { |k, v| send(:"#{k}=", v) }
15
+ parse(argv)
16
+ validate
17
+ end
18
+
19
+ private
20
+
21
+ def parse(argv)
22
+ OptionParser.new do |parser|
23
+ parser.banner = "Usage: dump_cleaner -f source_dump -t cleaned_dump [options]"
24
+
25
+ parser.on("-f", "--from=SOURCE_DUMP_PATH",
26
+ "File or directory of the original (source) dump") do |option|
27
+ self.source_dump_path = option
28
+ end
29
+ parser.on("-t", "--to=DESTINATION_DUMP_PATH",
30
+ "File or directory of the cleaned (destination) dump") do |option|
31
+ self.destination_dump_path = option
32
+ end
33
+ parser.on("-c", "--config=CONFIG_FILE", "Configuration file path") do |option|
34
+ self.config_file = option
35
+ end
36
+ end.parse!(argv)
37
+ end
38
+
39
+ def validate
40
+ if !source_dump_path || !destination_dump_path # rubocop:disable Style/GuardClause
41
+ raise ArgumentError, "Missing source or destination dump file or directory,
42
+ please use -f and -t options. Use -h for help.".gsub(/\s+/, " ")
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DumpCleaner
4
+ class Processor
5
+ attr_reader :options
6
+
7
+ def initialize(options)
8
+ @options = options
9
+ end
10
+
11
+ def run
12
+ start_time = Time.now
13
+
14
+ cleaner_class = case config.dump_format
15
+ when "mysql_shell"
16
+ Cleaners::MysqlShellDumpCleaner
17
+ else
18
+ raise Config::ConfigurationError, "Unsupported dump format #{config.dump_format}"
19
+ end
20
+
21
+ Log.debug { "Starting cleanup with #{cleaner_class}…" }
22
+ cleaner = cleaner_class.new(config:, options:)
23
+ cleaner.pre_cleanup
24
+ cleaner.clean
25
+ cleaner.post_cleanup
26
+
27
+ diff = Time.now - start_time
28
+ Log.info { "Finished in #{diff.div(60)}m #{(diff % 60).to_i}s." }
29
+ end
30
+
31
+ private
32
+
33
+ def config
34
+ @config ||= Config.new(options.config_file)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DumpCleaner
4
+ VERSION = "0.5.0"
5
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zeitwerk"
4
+ loader = Zeitwerk::Loader.for_gem
5
+ loader.setup
6
+
7
+ module DumpCleaner
8
+ class Error < StandardError; end
9
+ # Your code goes here...
10
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dump_cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Matouš Borák
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-06-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: zeitwerk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.6'
27
+ description: Deterministically anonymizes data in logical database dumps. Useful for
28
+ importing (anonymized) production data into development environments.
29
+ email:
30
+ - matous.borak@nejremeslnici.cz
31
+ executables:
32
+ - dump_cleaner
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - ".rspec"
37
+ - ".rubocop.yml"
38
+ - CHANGELOG.md
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - doc/workflow_steps.md
43
+ - dump_cleaner.gemspec
44
+ - exe/dump_cleaner
45
+ - lib/dump_cleaner.rb
46
+ - lib/dump_cleaner/cleaners/base_cleaner.rb
47
+ - lib/dump_cleaner/cleaners/mysql_shell_dump_cleaner.rb
48
+ - lib/dump_cleaner/cleaners/mysql_shell_dump_helpers.rb
49
+ - lib/dump_cleaner/cleaners/mysql_shell_table_cleaner.rb
50
+ - lib/dump_cleaner/cleanup/bytesize_helpers.rb
51
+ - lib/dump_cleaner/cleanup/cleaning.rb
52
+ - lib/dump_cleaner/cleanup/cleaning_steps/add_repetition_suffix.rb
53
+ - lib/dump_cleaner/cleanup/cleaning_steps/base.rb
54
+ - lib/dump_cleaner/cleanup/cleaning_steps/fill_up_with_string.rb
55
+ - lib/dump_cleaner/cleanup/cleaning_steps/generate_random_string.rb
56
+ - lib/dump_cleaner/cleanup/cleaning_steps/inspect_context.rb
57
+ - lib/dump_cleaner/cleanup/cleaning_steps/randomize_email.rb
58
+ - lib/dump_cleaner/cleanup/cleaning_steps/randomize_formatted_number.rb
59
+ - lib/dump_cleaner/cleanup/cleaning_steps/randomize_number.rb
60
+ - lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_bytesize.rb
61
+ - lib/dump_cleaner/cleanup/cleaning_steps/select_data_by_pattern.rb
62
+ - lib/dump_cleaner/cleanup/cleaning_steps/take_sample.rb
63
+ - lib/dump_cleaner/cleanup/data_source.rb
64
+ - lib/dump_cleaner/cleanup/data_source_steps/base.rb
65
+ - lib/dump_cleaner/cleanup/data_source_steps/group_by_bytesize.rb
66
+ - lib/dump_cleaner/cleanup/data_source_steps/inspect_context.rb
67
+ - lib/dump_cleaner/cleanup/data_source_steps/load_yaml_file.rb
68
+ - lib/dump_cleaner/cleanup/data_source_steps/remove_accents.rb
69
+ - lib/dump_cleaner/cleanup/inspection.rb
70
+ - lib/dump_cleaner/cleanup/step_context.rb
71
+ - lib/dump_cleaner/cleanup/uniqueness.rb
72
+ - lib/dump_cleaner/cleanup/workflow.rb
73
+ - lib/dump_cleaner/conditions.rb
74
+ - lib/dump_cleaner/config.rb
75
+ - lib/dump_cleaner/log.rb
76
+ - lib/dump_cleaner/options.rb
77
+ - lib/dump_cleaner/processor.rb
78
+ - lib/dump_cleaner/version.rb
79
+ homepage: https://github.com/NejRemeslnici/dump-cleaner
80
+ licenses:
81
+ - MIT
82
+ metadata:
83
+ homepage_uri: https://github.com/NejRemeslnici/dump-cleaner
84
+ source_code_uri: https://github.com/NejRemeslnici/dump-cleaner
85
+ changelog_uri: https://github.com/NejRemeslnici/dump-cleaner/blob/main/CHANGELOG.md
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 3.1.0
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubygems_version: 3.5.3
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Anonymizes data in logical database dumps.
105
+ test_files: []