data_taster 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,263 @@
1
+ PATH
2
+ remote: ../../rubocop-powerhome
3
+ specs:
4
+ rubocop-powerhome (0.5.2)
5
+ rubocop (~> 1.52.0)
6
+ rubocop-performance
7
+ rubocop-rails
8
+ rubocop-rake
9
+ rubocop-rspec
10
+
11
+ PATH
12
+ remote: ..
13
+ specs:
14
+ data_taster (0.2.2)
15
+ rails (>= 6.0)
16
+
17
+ GEM
18
+ remote: https://rubygems.org/
19
+ specs:
20
+ actioncable (7.0.6)
21
+ actionpack (= 7.0.6)
22
+ activesupport (= 7.0.6)
23
+ nio4r (~> 2.0)
24
+ websocket-driver (>= 0.6.1)
25
+ actionmailbox (7.0.6)
26
+ actionpack (= 7.0.6)
27
+ activejob (= 7.0.6)
28
+ activerecord (= 7.0.6)
29
+ activestorage (= 7.0.6)
30
+ activesupport (= 7.0.6)
31
+ mail (>= 2.7.1)
32
+ net-imap
33
+ net-pop
34
+ net-smtp
35
+ actionmailer (7.0.6)
36
+ actionpack (= 7.0.6)
37
+ actionview (= 7.0.6)
38
+ activejob (= 7.0.6)
39
+ activesupport (= 7.0.6)
40
+ mail (~> 2.5, >= 2.5.4)
41
+ net-imap
42
+ net-pop
43
+ net-smtp
44
+ rails-dom-testing (~> 2.0)
45
+ actionpack (7.0.6)
46
+ actionview (= 7.0.6)
47
+ activesupport (= 7.0.6)
48
+ rack (~> 2.0, >= 2.2.4)
49
+ rack-test (>= 0.6.3)
50
+ rails-dom-testing (~> 2.0)
51
+ rails-html-sanitizer (~> 1.0, >= 1.2.0)
52
+ actiontext (7.0.6)
53
+ actionpack (= 7.0.6)
54
+ activerecord (= 7.0.6)
55
+ activestorage (= 7.0.6)
56
+ activesupport (= 7.0.6)
57
+ globalid (>= 0.6.0)
58
+ nokogiri (>= 1.8.5)
59
+ actionview (7.0.6)
60
+ activesupport (= 7.0.6)
61
+ builder (~> 3.1)
62
+ erubi (~> 1.4)
63
+ rails-dom-testing (~> 2.0)
64
+ rails-html-sanitizer (~> 1.1, >= 1.2.0)
65
+ activejob (7.0.6)
66
+ activesupport (= 7.0.6)
67
+ globalid (>= 0.3.6)
68
+ activemodel (7.0.6)
69
+ activesupport (= 7.0.6)
70
+ activerecord (7.0.6)
71
+ activemodel (= 7.0.6)
72
+ activesupport (= 7.0.6)
73
+ activestorage (7.0.6)
74
+ actionpack (= 7.0.6)
75
+ activejob (= 7.0.6)
76
+ activerecord (= 7.0.6)
77
+ activesupport (= 7.0.6)
78
+ marcel (~> 1.0)
79
+ mini_mime (>= 1.1.0)
80
+ activesupport (7.0.6)
81
+ concurrent-ruby (~> 1.0, >= 1.0.2)
82
+ i18n (>= 1.6, < 2)
83
+ minitest (>= 5.1)
84
+ tzinfo (~> 2.0)
85
+ appraisal (2.5.0)
86
+ bundler
87
+ rake
88
+ thor (>= 0.14.0)
89
+ ast (2.4.2)
90
+ builder (3.2.4)
91
+ concurrent-ruby (1.2.2)
92
+ crass (1.0.6)
93
+ date (3.3.4)
94
+ diff-lcs (1.5.0)
95
+ docile (1.1.5)
96
+ erubi (1.12.0)
97
+ globalid (1.2.1)
98
+ activesupport (>= 6.1)
99
+ i18n (1.14.1)
100
+ concurrent-ruby (~> 1.0)
101
+ json (2.7.1)
102
+ license_finder (7.1.0)
103
+ bundler
104
+ rubyzip (>= 1, < 3)
105
+ thor (~> 1.2)
106
+ tomlrb (>= 1.3, < 2.1)
107
+ with_env (= 1.1.0)
108
+ xml-simple (~> 1.1.9)
109
+ loofah (2.22.0)
110
+ crass (~> 1.0.2)
111
+ nokogiri (>= 1.12.0)
112
+ mail (2.8.1)
113
+ mini_mime (>= 0.1.1)
114
+ net-imap
115
+ net-pop
116
+ net-smtp
117
+ marcel (1.0.2)
118
+ method_source (1.0.0)
119
+ mini_mime (1.1.5)
120
+ minitest (5.20.0)
121
+ net-imap (0.4.9.1)
122
+ date
123
+ net-protocol
124
+ net-pop (0.1.2)
125
+ net-protocol
126
+ net-protocol (0.2.2)
127
+ timeout
128
+ net-smtp (0.4.0.1)
129
+ net-protocol
130
+ nio4r (2.7.0)
131
+ nokogiri (1.15.5-arm64-darwin)
132
+ racc (~> 1.4)
133
+ parallel (1.24.0)
134
+ parser (3.2.2.4)
135
+ ast (~> 2.4.1)
136
+ racc
137
+ power_assert (2.0.3)
138
+ racc (1.7.3)
139
+ rack (2.2.8)
140
+ rack-test (2.1.0)
141
+ rack (>= 1.3)
142
+ rails (7.0.6)
143
+ actioncable (= 7.0.6)
144
+ actionmailbox (= 7.0.6)
145
+ actionmailer (= 7.0.6)
146
+ actionpack (= 7.0.6)
147
+ actiontext (= 7.0.6)
148
+ actionview (= 7.0.6)
149
+ activejob (= 7.0.6)
150
+ activemodel (= 7.0.6)
151
+ activerecord (= 7.0.6)
152
+ activestorage (= 7.0.6)
153
+ activesupport (= 7.0.6)
154
+ bundler (>= 1.15.0)
155
+ railties (= 7.0.6)
156
+ rails-dom-testing (2.2.0)
157
+ activesupport (>= 5.0.0)
158
+ minitest
159
+ nokogiri (>= 1.6)
160
+ rails-html-sanitizer (1.6.0)
161
+ loofah (~> 2.21)
162
+ nokogiri (~> 1.14)
163
+ railties (7.0.6)
164
+ actionpack (= 7.0.6)
165
+ activesupport (= 7.0.6)
166
+ method_source
167
+ rake (>= 12.2)
168
+ thor (~> 1.0)
169
+ zeitwerk (~> 2.5)
170
+ rainbow (2.2.2)
171
+ rake
172
+ rake (13.1.0)
173
+ regexp_parser (2.8.3)
174
+ rexml (3.2.6)
175
+ rspec (3.12.0)
176
+ rspec-core (~> 3.12.0)
177
+ rspec-expectations (~> 3.12.0)
178
+ rspec-mocks (~> 3.12.0)
179
+ rspec-core (3.12.2)
180
+ rspec-support (~> 3.12.0)
181
+ rspec-expectations (3.12.3)
182
+ diff-lcs (>= 1.2.0, < 2.0)
183
+ rspec-support (~> 3.12.0)
184
+ rspec-mocks (3.12.6)
185
+ diff-lcs (>= 1.2.0, < 2.0)
186
+ rspec-support (~> 3.12.0)
187
+ rspec-support (3.12.1)
188
+ rubocop (1.52.1)
189
+ json (~> 2.3)
190
+ parallel (~> 1.10)
191
+ parser (>= 3.2.2.3)
192
+ rainbow (>= 2.2.2, < 4.0)
193
+ regexp_parser (>= 1.8, < 3.0)
194
+ rexml (>= 3.2.5, < 4.0)
195
+ rubocop-ast (>= 1.28.0, < 2.0)
196
+ ruby-progressbar (~> 1.7)
197
+ unicode-display_width (>= 2.4.0, < 3.0)
198
+ rubocop-ast (1.30.0)
199
+ parser (>= 3.2.1.0)
200
+ rubocop-capybara (2.20.0)
201
+ rubocop (~> 1.41)
202
+ rubocop-factory_bot (2.24.0)
203
+ rubocop (~> 1.33)
204
+ rubocop-performance (1.20.1)
205
+ rubocop (>= 1.48.1, < 2.0)
206
+ rubocop-ast (>= 1.30.0, < 2.0)
207
+ rubocop-rails (2.23.1)
208
+ activesupport (>= 4.2.0)
209
+ rack (>= 1.1)
210
+ rubocop (>= 1.33.0, < 2.0)
211
+ rubocop-ast (>= 1.30.0, < 2.0)
212
+ rubocop-rake (0.6.0)
213
+ rubocop (~> 1.0)
214
+ rubocop-rspec (2.25.0)
215
+ rubocop (~> 1.40)
216
+ rubocop-capybara (~> 2.17)
217
+ rubocop-factory_bot (~> 2.22)
218
+ ruby-progressbar (1.13.0)
219
+ rubyzip (2.3.2)
220
+ simplecov (0.15.1)
221
+ docile (~> 1.1.0)
222
+ json (>= 1.8, < 3)
223
+ simplecov-html (~> 0.10.0)
224
+ simplecov-html (0.10.2)
225
+ test-unit (3.1.5)
226
+ power_assert
227
+ thor (1.3.0)
228
+ timeout (0.4.1)
229
+ tomlrb (2.0.3)
230
+ tzinfo (2.0.6)
231
+ concurrent-ruby (~> 1.0)
232
+ unicode-display_width (2.5.0)
233
+ websocket-driver (0.7.6)
234
+ websocket-extensions (>= 0.1.0)
235
+ websocket-extensions (0.1.5)
236
+ with_env (1.1.0)
237
+ xml-simple (1.1.9)
238
+ rexml
239
+ yard (0.9.34)
240
+ zeitwerk (2.6.12)
241
+
242
+ PLATFORMS
243
+ arm64-darwin-22
244
+ arm64-darwin-23
245
+
246
+ DEPENDENCIES
247
+ appraisal (= 2.5.0)
248
+ bundler (~> 2.1)
249
+ data_taster!
250
+ license_finder (~> 7.0)
251
+ nokogiri (< 1.16)
252
+ parser (>= 2.5, != 2.5.1.1)
253
+ rails (= 7.0.6)
254
+ rainbow (= 2.2.2)
255
+ rake (~> 13.0)
256
+ rspec (~> 3.0)
257
+ rubocop-powerhome!
258
+ simplecov (= 0.15.1)
259
+ test-unit (= 3.1.5)
260
+ yard (= 0.9.34)
261
+
262
+ BUNDLED WITH
263
+ 2.4.22
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # Ingests the processed yml file and returns either
5
+ # an empty hash (for cases that are skippable)
6
+ # or a hash that contains:
7
+ # select:
8
+ # (whatever is configured to go in the SELECT .. WHERE clause)
9
+ # sanitize:
10
+ # the columns and values that need custom sanitization
11
+ class Collection
12
+ def initialize(table_name)
13
+ @table_name = table_name
14
+ @ingredients = DataTaster.confection[table_name]
15
+ @include_insert = DataTaster.config.include_insert
16
+ end
17
+
18
+ def assemble
19
+ DataTaster.logger.info("#{table_name}...")
20
+
21
+ if skippable?
22
+ DataTaster.logger.info("configured to skip both schema and data")
23
+ {}
24
+ else
25
+ { select: selection, sanitize: sanitization }
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ attr_reader :table_name, :ingredients, :include_insert
32
+
33
+ def skippable?
34
+ table_name.downcase.match(/^_/) ||
35
+ ingredients == DataTaster::SKIP_CODE
36
+ end
37
+
38
+ def selection
39
+ insert = include_insert ? "INSERT INTO #{working_db}.#{table_name}" : ""
40
+
41
+ sql = <<-SQL.squish
42
+ #{insert}
43
+ SELECT * FROM #{source_db}.#{table_name}
44
+ WHERE #{where_clause}
45
+ SQL
46
+
47
+ DataTaster.logger.info(sql)
48
+ sql
49
+ end
50
+
51
+ # The yml file allows you to define either a simple clause
52
+ # or some more fine-grained sanitization. If neither is
53
+ # defined, we pass a clause that selects nothing.
54
+ def where_clause
55
+ clause = ingredients.is_a?(Hash) ? ingredients["select"] : ingredients
56
+
57
+ clause || "1 = 0"
58
+ end
59
+
60
+ def sanitization
61
+ return unless ingredients.is_a?(Hash)
62
+
63
+ ingredients["sanitize"]
64
+ end
65
+
66
+ def source_db
67
+ @source_db ||= DataTaster.config.source_client.query_options[:database]
68
+ end
69
+
70
+ def working_db
71
+ @working_db ||= DataTaster.config.working_client.query_options[:database]
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "erb"
4
+ require "socket"
5
+ require "yaml"
6
+ require "data_taster/flavors"
7
+
8
+ module DataTaster
9
+ # Ingests the list of data_taster_export_tables.yml files
10
+ # and processes them through an erb template
11
+ # returns a ruby hash of the data
12
+ class Confection
13
+ def assemble
14
+ DataTaster.config.list.each_with_object(default_data) do |path, merged_list|
15
+ merged_list.merge!(load_yml(path.to_s))
16
+ end
17
+ end
18
+
19
+ def load_yml(filename)
20
+ return {} unless File.exist?(filename)
21
+
22
+ erb = ::ERB.new(File.read(filename))
23
+ erb.filename = filename
24
+ flavored_erb = erb.def_class(DataTaster::Flavors, "render()")
25
+ erb_result = flavored_erb.new.render
26
+
27
+ YAML.safe_load(erb_result.gsub(/((.|\n)*---)/, "\n---")) || {}
28
+ end
29
+
30
+ private
31
+
32
+ def default_data
33
+ {
34
+ "schema_migrations" => "1 = 1",
35
+ }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # Returns SQL for given data, based on types. Used for sanitizing inputs.
5
+ class Detergent
6
+ SANITIZE_FUNCTIONS = [
7
+ /CONCAT/,
8
+ /DATE/,
9
+ /DAY/,
10
+ /FORMAT/,
11
+ /LOWER/,
12
+ /REPLACE/,
13
+ /TRIM/,
14
+ /UCASE/,
15
+ /UPPER/,
16
+ ].freeze
17
+
18
+ def initialize(table_name, column_name, given_value)
19
+ @table_name = table_name
20
+ @column_name = column_name
21
+ @value = parse_value(given_value)
22
+ end
23
+
24
+ def deliver
25
+ return value if value == DataTaster::SKIP_CODE
26
+
27
+ sql = sql_for(value)
28
+
29
+ DataTaster.logger.info("--> #{sql}")
30
+ sql
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :table_name, :column_name, :value
36
+
37
+ def parse_value(given_value)
38
+ # yml files can't hold custom-set dates, they have to be converted to strings
39
+ return given_value unless given_value.is_a?(String) && given_value.match?(/\d{4}-\d{2}-\d{2}/)
40
+
41
+ Date.parse(given_value)
42
+ end
43
+
44
+ def sql_for(value)
45
+ if value.is_a?(Date)
46
+ sql_for_date_value
47
+ elsif value.is_a?(Numeric) || sanitize_function?
48
+ sql_for_uncast_value
49
+ elsif value.blank?
50
+ sql_for_nil_value
51
+ else
52
+ sql_for_cast_value
53
+ end
54
+ end
55
+
56
+ def sanitize_function?
57
+ SANITIZE_FUNCTIONS.any? { |fun| value.to_s.match(fun) }
58
+ end
59
+
60
+ def sql_for_uncast_value
61
+ <<-SQL.squish
62
+ UPDATE #{working_db}.#{table_name}
63
+ SET #{column_name} = #{value}
64
+ WHERE #{column_name} IS NOT NULL
65
+ AND #{column_name} <> #{value}
66
+ SQL
67
+ end
68
+
69
+ def sql_for_date_value
70
+ <<-SQL.squish
71
+ UPDATE #{working_db}.#{table_name}
72
+ SET #{column_name} = '#{value}'
73
+ WHERE #{column_name} IS NOT NULL
74
+ SQL
75
+ end
76
+
77
+ def sql_for_nil_value
78
+ <<-SQL.squish
79
+ UPDATE #{working_db}.#{table_name}
80
+ SET #{column_name} = NULL
81
+ WHERE #{column_name} IS NOT NULL
82
+ SQL
83
+ end
84
+
85
+ def sql_for_cast_value
86
+ <<-SQL.squish
87
+ UPDATE #{working_db}.#{table_name}
88
+ SET #{column_name} = '#{value}'
89
+ WHERE #{column_name} IS NOT NULL
90
+ AND #{column_name} <> ''
91
+ SQL
92
+ end
93
+
94
+ def working_db
95
+ DataTaster.config.working_client.query_options[:database]
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # helper methods made to make data_taster_export_tables.yml
5
+ # files more user-friendly
6
+ class Flavors
7
+ include DataTaster::Helper
8
+
9
+ def current_date
10
+ @current_date ||= Date.current
11
+ end
12
+
13
+ def date
14
+ @date ||= if DataTaster.config.months
15
+ (current_date - DataTaster.config.months.to_i.months).beginning_of_day.to_s(:db)
16
+ else
17
+ (current_date - 1.week).beginning_of_day.to_s(:db)
18
+ end
19
+ end
20
+
21
+ # skips dumping both schema and data
22
+ def deprecated_table
23
+ DataTaster::SKIP_CODE
24
+ end
25
+
26
+ def skip_sanitization
27
+ DataTaster::SKIP_CODE
28
+ end
29
+
30
+ def encrypt(klass, column, value = nil)
31
+ value_to_encrypt = value || default_value_for(column)
32
+
33
+ klass.new.encrypt(column, value_to_encrypt)
34
+ end
35
+
36
+ def default_value_for(column)
37
+ case column
38
+ when /date_of_birth/, /dob/
39
+ (Date.current - 25.years).strftime("%m/%d/%Y")
40
+ when /ssn/, /license/
41
+ "111111111"
42
+ when /compensation/
43
+ 1
44
+ else
45
+ "1"
46
+ end
47
+ end
48
+
49
+ def full_table_dump
50
+ "1 = 1"
51
+ end
52
+
53
+ def recent_table_updates
54
+ "created_at >= '#{date}' OR updated_at >= '#{date}'"
55
+ end
56
+
57
+ def recent_ids(table_name, col_name)
58
+ <<~SQL.squish
59
+ (SELECT DISTINCT(#{col_name})
60
+ FROM #{source_db}.#{table_name}
61
+ WHERE
62
+ created_at >= '#{date}'
63
+ OR
64
+ updated_at >= '#{date}')
65
+ SQL
66
+ end
67
+
68
+ def source_db
69
+ @source_db ||= db_config["database"]
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # helpers used globally in DataTaster
5
+ module Helper
6
+ def sanitize_command(command, params = nil)
7
+ sanitized_command = command.gsub(Shellwords.escape(ENV.fetch("DEV_DUMP_USER", nil)), "<username>")
8
+ .gsub(Shellwords.escape(ENV.fetch("DEV_DUMP_PASSWORD", nil)), "<pwd>")
9
+ .gsub(ENV.fetch("DEV_DUMP_PASSWORD", nil), "<pwd>")
10
+
11
+ sanitized_command = sanitized_command.gsub(Shellwords.escape(params["password"]), "<pwd>") if params
12
+
13
+ sanitized_command
14
+ end
15
+
16
+ def db_yml
17
+ @db_yml ||= YAML.safe_load(ERB.new(Rails.root.join("config", "database.yml").read).result, aliases: true)
18
+ end
19
+
20
+ def db_config
21
+ @db_config ||= db_yml[Rails.env]
22
+ end
23
+
24
+ def logg(message)
25
+ DataTaster.logger.debug { "[#{Time.current}] #{message}" }
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # Selects and processes tables from the source_db
5
+ # to insert (or query) into the working_db
6
+ class Sample
7
+ def initialize(table_name)
8
+ @table_name = table_name
9
+ @include_insert = DataTaster.config.include_insert
10
+ @collection = DataTaster::Collection.new(
11
+ table_name
12
+ ).assemble
13
+ end
14
+
15
+ def serve!
16
+ # Any table that does not return SQL is considered deprecated and we should fully skip it
17
+ if collection.empty? && include_insert
18
+ DataTaster.safe_execute("DROP TABLE IF EXISTS #{table_name}")
19
+ else
20
+ ensure_empty_table
21
+ process_select(collection[:select])
22
+ DataTaster::Sanitizer.new(table_name, collection[:sanitize]).clean!
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :table_name, :include_insert, :collection
29
+
30
+ def ensure_empty_table
31
+ DataTaster.safe_execute("TRUNCATE TABLE #{working_db}.#{table_name}")
32
+ end
33
+
34
+ def process_select(sql)
35
+ DataTaster.safe_execute(sql)
36
+ rescue => e
37
+ e.message << " executing SQL statement for #{table_name}: #{sql}"
38
+ raise e
39
+ end
40
+
41
+ def working_db
42
+ @working_db ||= DataTaster.config.working_client.query_options[:database]
43
+ end
44
+ end
45
+ end