data_taster 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,263 @@
1
+ PATH
2
+ remote: ../../rubocop-powerhome
3
+ specs:
4
+ rubocop-powerhome (0.5.2)
5
+ rubocop (~> 1.52.0)
6
+ rubocop-performance
7
+ rubocop-rails
8
+ rubocop-rake
9
+ rubocop-rspec
10
+
11
+ PATH
12
+ remote: ..
13
+ specs:
14
+ data_taster (0.2.2)
15
+ rails (>= 6.0)
16
+
17
+ GEM
18
+ remote: https://rubygems.org/
19
+ specs:
20
+ actioncable (7.0.6)
21
+ actionpack (= 7.0.6)
22
+ activesupport (= 7.0.6)
23
+ nio4r (~> 2.0)
24
+ websocket-driver (>= 0.6.1)
25
+ actionmailbox (7.0.6)
26
+ actionpack (= 7.0.6)
27
+ activejob (= 7.0.6)
28
+ activerecord (= 7.0.6)
29
+ activestorage (= 7.0.6)
30
+ activesupport (= 7.0.6)
31
+ mail (>= 2.7.1)
32
+ net-imap
33
+ net-pop
34
+ net-smtp
35
+ actionmailer (7.0.6)
36
+ actionpack (= 7.0.6)
37
+ actionview (= 7.0.6)
38
+ activejob (= 7.0.6)
39
+ activesupport (= 7.0.6)
40
+ mail (~> 2.5, >= 2.5.4)
41
+ net-imap
42
+ net-pop
43
+ net-smtp
44
+ rails-dom-testing (~> 2.0)
45
+ actionpack (7.0.6)
46
+ actionview (= 7.0.6)
47
+ activesupport (= 7.0.6)
48
+ rack (~> 2.0, >= 2.2.4)
49
+ rack-test (>= 0.6.3)
50
+ rails-dom-testing (~> 2.0)
51
+ rails-html-sanitizer (~> 1.0, >= 1.2.0)
52
+ actiontext (7.0.6)
53
+ actionpack (= 7.0.6)
54
+ activerecord (= 7.0.6)
55
+ activestorage (= 7.0.6)
56
+ activesupport (= 7.0.6)
57
+ globalid (>= 0.6.0)
58
+ nokogiri (>= 1.8.5)
59
+ actionview (7.0.6)
60
+ activesupport (= 7.0.6)
61
+ builder (~> 3.1)
62
+ erubi (~> 1.4)
63
+ rails-dom-testing (~> 2.0)
64
+ rails-html-sanitizer (~> 1.1, >= 1.2.0)
65
+ activejob (7.0.6)
66
+ activesupport (= 7.0.6)
67
+ globalid (>= 0.3.6)
68
+ activemodel (7.0.6)
69
+ activesupport (= 7.0.6)
70
+ activerecord (7.0.6)
71
+ activemodel (= 7.0.6)
72
+ activesupport (= 7.0.6)
73
+ activestorage (7.0.6)
74
+ actionpack (= 7.0.6)
75
+ activejob (= 7.0.6)
76
+ activerecord (= 7.0.6)
77
+ activesupport (= 7.0.6)
78
+ marcel (~> 1.0)
79
+ mini_mime (>= 1.1.0)
80
+ activesupport (7.0.6)
81
+ concurrent-ruby (~> 1.0, >= 1.0.2)
82
+ i18n (>= 1.6, < 2)
83
+ minitest (>= 5.1)
84
+ tzinfo (~> 2.0)
85
+ appraisal (2.5.0)
86
+ bundler
87
+ rake
88
+ thor (>= 0.14.0)
89
+ ast (2.4.2)
90
+ builder (3.2.4)
91
+ concurrent-ruby (1.2.2)
92
+ crass (1.0.6)
93
+ date (3.3.4)
94
+ diff-lcs (1.5.0)
95
+ docile (1.1.5)
96
+ erubi (1.12.0)
97
+ globalid (1.2.1)
98
+ activesupport (>= 6.1)
99
+ i18n (1.14.1)
100
+ concurrent-ruby (~> 1.0)
101
+ json (2.7.1)
102
+ license_finder (7.1.0)
103
+ bundler
104
+ rubyzip (>= 1, < 3)
105
+ thor (~> 1.2)
106
+ tomlrb (>= 1.3, < 2.1)
107
+ with_env (= 1.1.0)
108
+ xml-simple (~> 1.1.9)
109
+ loofah (2.22.0)
110
+ crass (~> 1.0.2)
111
+ nokogiri (>= 1.12.0)
112
+ mail (2.8.1)
113
+ mini_mime (>= 0.1.1)
114
+ net-imap
115
+ net-pop
116
+ net-smtp
117
+ marcel (1.0.2)
118
+ method_source (1.0.0)
119
+ mini_mime (1.1.5)
120
+ minitest (5.20.0)
121
+ net-imap (0.4.9.1)
122
+ date
123
+ net-protocol
124
+ net-pop (0.1.2)
125
+ net-protocol
126
+ net-protocol (0.2.2)
127
+ timeout
128
+ net-smtp (0.4.0.1)
129
+ net-protocol
130
+ nio4r (2.7.0)
131
+ nokogiri (1.15.5-arm64-darwin)
132
+ racc (~> 1.4)
133
+ parallel (1.24.0)
134
+ parser (3.2.2.4)
135
+ ast (~> 2.4.1)
136
+ racc
137
+ power_assert (2.0.3)
138
+ racc (1.7.3)
139
+ rack (2.2.8)
140
+ rack-test (2.1.0)
141
+ rack (>= 1.3)
142
+ rails (7.0.6)
143
+ actioncable (= 7.0.6)
144
+ actionmailbox (= 7.0.6)
145
+ actionmailer (= 7.0.6)
146
+ actionpack (= 7.0.6)
147
+ actiontext (= 7.0.6)
148
+ actionview (= 7.0.6)
149
+ activejob (= 7.0.6)
150
+ activemodel (= 7.0.6)
151
+ activerecord (= 7.0.6)
152
+ activestorage (= 7.0.6)
153
+ activesupport (= 7.0.6)
154
+ bundler (>= 1.15.0)
155
+ railties (= 7.0.6)
156
+ rails-dom-testing (2.2.0)
157
+ activesupport (>= 5.0.0)
158
+ minitest
159
+ nokogiri (>= 1.6)
160
+ rails-html-sanitizer (1.6.0)
161
+ loofah (~> 2.21)
162
+ nokogiri (~> 1.14)
163
+ railties (7.0.6)
164
+ actionpack (= 7.0.6)
165
+ activesupport (= 7.0.6)
166
+ method_source
167
+ rake (>= 12.2)
168
+ thor (~> 1.0)
169
+ zeitwerk (~> 2.5)
170
+ rainbow (2.2.2)
171
+ rake
172
+ rake (13.1.0)
173
+ regexp_parser (2.8.3)
174
+ rexml (3.2.6)
175
+ rspec (3.12.0)
176
+ rspec-core (~> 3.12.0)
177
+ rspec-expectations (~> 3.12.0)
178
+ rspec-mocks (~> 3.12.0)
179
+ rspec-core (3.12.2)
180
+ rspec-support (~> 3.12.0)
181
+ rspec-expectations (3.12.3)
182
+ diff-lcs (>= 1.2.0, < 2.0)
183
+ rspec-support (~> 3.12.0)
184
+ rspec-mocks (3.12.6)
185
+ diff-lcs (>= 1.2.0, < 2.0)
186
+ rspec-support (~> 3.12.0)
187
+ rspec-support (3.12.1)
188
+ rubocop (1.52.1)
189
+ json (~> 2.3)
190
+ parallel (~> 1.10)
191
+ parser (>= 3.2.2.3)
192
+ rainbow (>= 2.2.2, < 4.0)
193
+ regexp_parser (>= 1.8, < 3.0)
194
+ rexml (>= 3.2.5, < 4.0)
195
+ rubocop-ast (>= 1.28.0, < 2.0)
196
+ ruby-progressbar (~> 1.7)
197
+ unicode-display_width (>= 2.4.0, < 3.0)
198
+ rubocop-ast (1.30.0)
199
+ parser (>= 3.2.1.0)
200
+ rubocop-capybara (2.20.0)
201
+ rubocop (~> 1.41)
202
+ rubocop-factory_bot (2.24.0)
203
+ rubocop (~> 1.33)
204
+ rubocop-performance (1.20.1)
205
+ rubocop (>= 1.48.1, < 2.0)
206
+ rubocop-ast (>= 1.30.0, < 2.0)
207
+ rubocop-rails (2.23.1)
208
+ activesupport (>= 4.2.0)
209
+ rack (>= 1.1)
210
+ rubocop (>= 1.33.0, < 2.0)
211
+ rubocop-ast (>= 1.30.0, < 2.0)
212
+ rubocop-rake (0.6.0)
213
+ rubocop (~> 1.0)
214
+ rubocop-rspec (2.25.0)
215
+ rubocop (~> 1.40)
216
+ rubocop-capybara (~> 2.17)
217
+ rubocop-factory_bot (~> 2.22)
218
+ ruby-progressbar (1.13.0)
219
+ rubyzip (2.3.2)
220
+ simplecov (0.15.1)
221
+ docile (~> 1.1.0)
222
+ json (>= 1.8, < 3)
223
+ simplecov-html (~> 0.10.0)
224
+ simplecov-html (0.10.2)
225
+ test-unit (3.1.5)
226
+ power_assert
227
+ thor (1.3.0)
228
+ timeout (0.4.1)
229
+ tomlrb (2.0.3)
230
+ tzinfo (2.0.6)
231
+ concurrent-ruby (~> 1.0)
232
+ unicode-display_width (2.5.0)
233
+ websocket-driver (0.7.6)
234
+ websocket-extensions (>= 0.1.0)
235
+ websocket-extensions (0.1.5)
236
+ with_env (1.1.0)
237
+ xml-simple (1.1.9)
238
+ rexml
239
+ yard (0.9.34)
240
+ zeitwerk (2.6.12)
241
+
242
+ PLATFORMS
243
+ arm64-darwin-22
244
+ arm64-darwin-23
245
+
246
+ DEPENDENCIES
247
+ appraisal (= 2.5.0)
248
+ bundler (~> 2.1)
249
+ data_taster!
250
+ license_finder (~> 7.0)
251
+ nokogiri (< 1.16)
252
+ parser (>= 2.5, != 2.5.1.1)
253
+ rails (= 7.0.6)
254
+ rainbow (= 2.2.2)
255
+ rake (~> 13.0)
256
+ rspec (~> 3.0)
257
+ rubocop-powerhome!
258
+ simplecov (= 0.15.1)
259
+ test-unit (= 3.1.5)
260
+ yard (= 0.9.34)
261
+
262
+ BUNDLED WITH
263
+ 2.4.22
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # Ingests the processed yml file and returns either
5
+ # an empty hash (for cases that are skippable)
6
+ # or a hash that contains:
7
+ # select:
8
+ # (whatever is configured to go in the SELECT .. WHERE clause)
9
+ # sanitize:
10
+ # the columns and values that need custom sanitization
11
+ class Collection
12
+ def initialize(table_name)
13
+ @table_name = table_name
14
+ @ingredients = DataTaster.confection[table_name]
15
+ @include_insert = DataTaster.config.include_insert
16
+ end
17
+
18
+ def assemble
19
+ DataTaster.logger.info("#{table_name}...")
20
+
21
+ if skippable?
22
+ DataTaster.logger.info("configured to skip both schema and data")
23
+ {}
24
+ else
25
+ { select: selection, sanitize: sanitization }
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ attr_reader :table_name, :ingredients, :include_insert
32
+
33
+ def skippable?
34
+ table_name.downcase.match(/^_/) ||
35
+ ingredients == DataTaster::SKIP_CODE
36
+ end
37
+
38
+ def selection
39
+ insert = include_insert ? "INSERT INTO #{working_db}.#{table_name}" : ""
40
+
41
+ sql = <<-SQL.squish
42
+ #{insert}
43
+ SELECT * FROM #{source_db}.#{table_name}
44
+ WHERE #{where_clause}
45
+ SQL
46
+
47
+ DataTaster.logger.info(sql)
48
+ sql
49
+ end
50
+
51
+ # The yml file allows you to define either a simple clause
52
+ # or some more fine-grained sanitization. If neither is
53
+ # defined, we pass a clause that selects nothing.
54
+ def where_clause
55
+ clause = ingredients.is_a?(Hash) ? ingredients["select"] : ingredients
56
+
57
+ clause || "1 = 0"
58
+ end
59
+
60
+ def sanitization
61
+ return unless ingredients.is_a?(Hash)
62
+
63
+ ingredients["sanitize"]
64
+ end
65
+
66
+ def source_db
67
+ @source_db ||= DataTaster.config.source_client.query_options[:database]
68
+ end
69
+
70
+ def working_db
71
+ @working_db ||= DataTaster.config.working_client.query_options[:database]
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "erb"
4
+ require "socket"
5
+ require "yaml"
6
+ require "data_taster/flavors"
7
+
8
+ module DataTaster
9
+ # Ingests the list of data_taster_export_tables.yml files
10
+ # and processes them through an erb template
11
+ # returns a ruby hash of the data
12
+ class Confection
13
+ def assemble
14
+ DataTaster.config.list.each_with_object(default_data) do |path, merged_list|
15
+ merged_list.merge!(load_yml(path.to_s))
16
+ end
17
+ end
18
+
19
+ def load_yml(filename)
20
+ return {} unless File.exist?(filename)
21
+
22
+ erb = ::ERB.new(File.read(filename))
23
+ erb.filename = filename
24
+ flavored_erb = erb.def_class(DataTaster::Flavors, "render()")
25
+ erb_result = flavored_erb.new.render
26
+
27
+ YAML.safe_load(erb_result.gsub(/((.|\n)*---)/, "\n---")) || {}
28
+ end
29
+
30
+ private
31
+
32
+ def default_data
33
+ {
34
+ "schema_migrations" => "1 = 1",
35
+ }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # Returns SQL for given data, based on types. Used for sanitizing inputs.
5
+ class Detergent
6
+ SANITIZE_FUNCTIONS = [
7
+ /CONCAT/,
8
+ /DATE/,
9
+ /DAY/,
10
+ /FORMAT/,
11
+ /LOWER/,
12
+ /REPLACE/,
13
+ /TRIM/,
14
+ /UCASE/,
15
+ /UPPER/,
16
+ ].freeze
17
+
18
+ def initialize(table_name, column_name, given_value)
19
+ @table_name = table_name
20
+ @column_name = column_name
21
+ @value = parse_value(given_value)
22
+ end
23
+
24
+ def deliver
25
+ return value if value == DataTaster::SKIP_CODE
26
+
27
+ sql = sql_for(value)
28
+
29
+ DataTaster.logger.info("--> #{sql}")
30
+ sql
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :table_name, :column_name, :value
36
+
37
+ def parse_value(given_value)
38
+ # yml files can't hold custom-set dates, they have to be converted to strings
39
+ return given_value unless given_value.is_a?(String) && given_value.match?(/\d{4}-\d{2}-\d{2}/)
40
+
41
+ Date.parse(given_value)
42
+ end
43
+
44
+ def sql_for(value)
45
+ if value.is_a?(Date)
46
+ sql_for_date_value
47
+ elsif value.is_a?(Numeric) || sanitize_function?
48
+ sql_for_uncast_value
49
+ elsif value.blank?
50
+ sql_for_nil_value
51
+ else
52
+ sql_for_cast_value
53
+ end
54
+ end
55
+
56
+ def sanitize_function?
57
+ SANITIZE_FUNCTIONS.any? { |fun| value.to_s.match(fun) }
58
+ end
59
+
60
+ def sql_for_uncast_value
61
+ <<-SQL.squish
62
+ UPDATE #{working_db}.#{table_name}
63
+ SET #{column_name} = #{value}
64
+ WHERE #{column_name} IS NOT NULL
65
+ AND #{column_name} <> #{value}
66
+ SQL
67
+ end
68
+
69
+ def sql_for_date_value
70
+ <<-SQL.squish
71
+ UPDATE #{working_db}.#{table_name}
72
+ SET #{column_name} = '#{value}'
73
+ WHERE #{column_name} IS NOT NULL
74
+ SQL
75
+ end
76
+
77
+ def sql_for_nil_value
78
+ <<-SQL.squish
79
+ UPDATE #{working_db}.#{table_name}
80
+ SET #{column_name} = NULL
81
+ WHERE #{column_name} IS NOT NULL
82
+ SQL
83
+ end
84
+
85
+ def sql_for_cast_value
86
+ <<-SQL.squish
87
+ UPDATE #{working_db}.#{table_name}
88
+ SET #{column_name} = '#{value}'
89
+ WHERE #{column_name} IS NOT NULL
90
+ AND #{column_name} <> ''
91
+ SQL
92
+ end
93
+
94
+ def working_db
95
+ DataTaster.config.working_client.query_options[:database]
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # helper methods made to make data_taster_export_tables.yml
5
+ # files more user-friendly
6
+ class Flavors
7
+ include DataTaster::Helper
8
+
9
+ def current_date
10
+ @current_date ||= Date.current
11
+ end
12
+
13
+ def date
14
+ @date ||= if DataTaster.config.months
15
+ (current_date - DataTaster.config.months.to_i.months).beginning_of_day.to_s(:db)
16
+ else
17
+ (current_date - 1.week).beginning_of_day.to_s(:db)
18
+ end
19
+ end
20
+
21
+ # skips dumping both schema and data
22
+ def deprecated_table
23
+ DataTaster::SKIP_CODE
24
+ end
25
+
26
+ def skip_sanitization
27
+ DataTaster::SKIP_CODE
28
+ end
29
+
30
+ def encrypt(klass, column, value = nil)
31
+ value_to_encrypt = value || default_value_for(column)
32
+
33
+ klass.new.encrypt(column, value_to_encrypt)
34
+ end
35
+
36
+ def default_value_for(column)
37
+ case column
38
+ when /date_of_birth/, /dob/
39
+ (Date.current - 25.years).strftime("%m/%d/%Y")
40
+ when /ssn/, /license/
41
+ "111111111"
42
+ when /compensation/
43
+ 1
44
+ else
45
+ "1"
46
+ end
47
+ end
48
+
49
+ def full_table_dump
50
+ "1 = 1"
51
+ end
52
+
53
+ def recent_table_updates
54
+ "created_at >= '#{date}' OR updated_at >= '#{date}'"
55
+ end
56
+
57
+ def recent_ids(table_name, col_name)
58
+ <<~SQL.squish
59
+ (SELECT DISTINCT(#{col_name})
60
+ FROM #{source_db}.#{table_name}
61
+ WHERE
62
+ created_at >= '#{date}'
63
+ OR
64
+ updated_at >= '#{date}')
65
+ SQL
66
+ end
67
+
68
+ def source_db
69
+ @source_db ||= db_config["database"]
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # helpers used globally in DataTaster
5
+ module Helper
6
+ def sanitize_command(command, params = nil)
7
+ sanitized_command = command.gsub(Shellwords.escape(ENV.fetch("DEV_DUMP_USER", nil)), "<username>")
8
+ .gsub(Shellwords.escape(ENV.fetch("DEV_DUMP_PASSWORD", nil)), "<pwd>")
9
+ .gsub(ENV.fetch("DEV_DUMP_PASSWORD", nil), "<pwd>")
10
+
11
+ sanitized_command = sanitized_command.gsub(Shellwords.escape(params["password"]), "<pwd>") if params
12
+
13
+ sanitized_command
14
+ end
15
+
16
+ def db_yml
17
+ @db_yml ||= YAML.safe_load(ERB.new(Rails.root.join("config", "database.yml").read).result, aliases: true)
18
+ end
19
+
20
+ def db_config
21
+ @db_config ||= db_yml[Rails.env]
22
+ end
23
+
24
+ def logg(message)
25
+ DataTaster.logger.debug { "[#{Time.current}] #{message}" }
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DataTaster
4
+ # Selects and processes tables from the source_db
5
+ # to insert (or query) into the working_db
6
+ class Sample
7
+ def initialize(table_name)
8
+ @table_name = table_name
9
+ @include_insert = DataTaster.config.include_insert
10
+ @collection = DataTaster::Collection.new(
11
+ table_name
12
+ ).assemble
13
+ end
14
+
15
+ def serve!
16
+ # Any table that does not return SQL is considered deprecated and we should fully skip it
17
+ if collection.empty? && include_insert
18
+ DataTaster.safe_execute("DROP TABLE IF EXISTS #{table_name}")
19
+ else
20
+ ensure_empty_table
21
+ process_select(collection[:select])
22
+ DataTaster::Sanitizer.new(table_name, collection[:sanitize]).clean!
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :table_name, :include_insert, :collection
29
+
30
+ def ensure_empty_table
31
+ DataTaster.safe_execute("TRUNCATE TABLE #{working_db}.#{table_name}")
32
+ end
33
+
34
+ def process_select(sql)
35
+ DataTaster.safe_execute(sql)
36
+ rescue => e
37
+ e.message << " executing SQL statement for #{table_name}: #{sql}"
38
+ raise e
39
+ end
40
+
41
+ def working_db
42
+ @working_db ||= DataTaster.config.working_client.query_options[:database]
43
+ end
44
+ end
45
+ end