bigquery_migration 0.1.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ require_relative 'config_loader'
2
+ require_relative 'error'
3
+ require_relative 'action'
4
+ require_relative 'hash_util'
5
+
6
+ class BigqueryMigration
7
+ class ActionRunner
8
+ attr_reader :config, :config_path, :opts
9
+
10
+ def initialize(config_path = nil, opts = {})
11
+ @config_path = config_path
12
+ @opts = opts
13
+ config = ConfigLoader.new(@config_path, opts[:vars]).load
14
+ @config = HashUtil.deep_symbolize_keys(config)
15
+ validate_config!
16
+ end
17
+
18
+ def run
19
+ success, responses = run_actions
20
+ { success: success, dry_run: @opts[:dry_run], actions: responses }
21
+ end
22
+
23
+ def run_actions
24
+ success = true
25
+ responses = []
26
+
27
+ @config[:actions].each do |action_config|
28
+ _success, result = Action.new(action_config, @opts).run
29
+ response = action_config.merge({'result' => result})
30
+ responses << response
31
+ unless _success
32
+ success = false
33
+ break
34
+ end
35
+ end
36
+
37
+ [success, responses]
38
+ end
39
+
40
+ def validate_config!
41
+ unless config.is_a?(Hash)
42
+ raise ConfigError, "config file format has to be YAML Hash"
43
+ end
44
+
45
+ unless config[:actions]
46
+ raise ConfigError, "config must have `actions` key"
47
+ end
48
+
49
+ unless config[:actions].is_a?(Array)
50
+ raise ConfigError, "config[:actions] must be an Array"
51
+ end
52
+
53
+ config[:actions].each do |action_config|
54
+ unless action_config[:action]
55
+ raise ConfigError, "Elements of `config[:actions]` must have `action` key"
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,675 @@
1
+ require 'csv'
2
+ require 'json'
3
+ require_relative 'schema'
4
+ require_relative 'error'
5
+ require_relative 'time_with_zone'
6
+ require_relative 'hash_util'
7
+ require 'google/apis/bigquery_v2'
8
+ require 'google/api_client/auth/key_utils'
9
+
10
+ class BigqueryMigration
11
+ class BigqueryWrapper
12
+ attr_reader :config
13
+
14
+ def logger
15
+ BigqueryMigration.logger
16
+ end
17
+
18
+ def initialize(config, opts = {})
19
+ @config = HashUtil.deep_symbolize_keys(config)
20
+ @opts = HashUtil.deep_symbolize_keys(opts)
21
+ configure
22
+ end
23
+
24
+ def configure
25
+ if json_keyfile = config[:json_keyfile]
26
+ config[:json_key] =
27
+ case json_keyfile
28
+ when String
29
+ File.read(json_keyfile)
30
+ when Hash
31
+ json_keyfile[:content]
32
+ else
33
+ raise ConfigError.new "Unsupported json_keyfile type"
34
+ end
35
+ else
36
+ config[:json_key] = {
37
+ project_id: config[:project_id],
38
+ service_email: config[:service_email],
39
+ private_key: config[:private_key],
40
+ }.to_json
41
+ end
42
+
43
+ if config[:json_key]
44
+ begin
45
+ jsonkey_params = JSON.parse(config[:json_key])
46
+ rescue => e
47
+ raise ConfigError.new "json_keyfile is not a JSON file"
48
+ end
49
+ end
50
+
51
+ if jsonkey_params
52
+ config[:project] ||= jsonkey_params['project_id']
53
+ end
54
+
55
+ config[:retries] ||= 5
56
+ config[:timeout_sec] ||= 300
57
+ config[:open_timeout_sec] ||= 300
58
+ end
59
+
60
+ def project
61
+ @project ||= config[:project] || raise(ConfigError, '`project` is required.')
62
+ end
63
+
64
+ def dataset
65
+ @dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
66
+ end
67
+
68
+ def table
69
+ @table ||= config[:table] || raise(ConfigError, '`table` is required.')
70
+ end
71
+
72
+ def job_status_polling_interval
73
+ @job_status_polling_interval ||= config[:job_status_polling_interval] || 5
74
+ end
75
+
76
+ def job_status_max_polling_time
77
+ @job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
78
+ end
79
+
80
+ def dry_run?
81
+ @opts[:dry_run]
82
+ end
83
+
84
+ def head
85
+ dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
86
+ end
87
+
88
+ def client
89
+ return @cached_client if @cached_client && @cached_client_expiration > Time.now
90
+
91
+ client = Google::Apis::BigqueryV2::BigqueryService.new
92
+ client.request_options.retries = config[:retries]
93
+ client.request_options.timeout_sec = config[:timeout_sec]
94
+ client.request_options.open_timeout_sec = config[:open_timeout_sec]
95
+ logger.debug { "client_options: #{client.client_options.to_h}" }
96
+ logger.debug { "request_options: #{client.request_options.to_h}" }
97
+
98
+ scope = "https://www.googleapis.com/auth/bigquery"
99
+
100
+ key = StringIO.new(config[:json_key])
101
+ auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
102
+ client.authorization = auth
103
+
104
+ @cached_client_expiration = Time.now + 1800
105
+ @cached_client = client
106
+ end
107
+
108
+ def existing_columns
109
+ begin
110
+ result = get_table
111
+ response = result[:responses][:get_table]
112
+ response.schema.fields.map {|column| column.to_h }
113
+ rescue NotFoundError
114
+ return []
115
+ end
116
+ end
117
+
118
+ def get_dataset(dataset: nil)
119
+ dataset ||= self.dataset
120
+ begin
121
+ logger.info { "Get dataset... #{project}:#{dataset}" }
122
+ client.get_dataset(project, dataset)
123
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
124
+ if e.status_code == 404
125
+ raise NotFoundError, "Dataset #{project}:#{dataset} is not found"
126
+ end
127
+
128
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
129
+ raise Error, "Failed to get_dataset(#{project}, #{dataset}), response:#{response}"
130
+ end
131
+
132
+ { responses: { get_dataset: response } }
133
+ end
134
+
135
+ def insert_dataset(dataset: nil, reference: nil)
136
+ dataset ||= self.dataset
137
+ begin
138
+ logger.info { "#{head}Insert (create) dataset... #{project}:#{dataset}" }
139
+ hint = {}
140
+ if reference
141
+ response = get_dataset(reference)
142
+ hint = { access: response.access }
143
+ end
144
+ body = {
145
+ dataset_reference: {
146
+ project_id: project,
147
+ dataset_id: dataset,
148
+ },
149
+ }.merge(hint)
150
+ opts = {}
151
+ logger.debug { "#{head}insert_dataset(#{project}, #{body}, #{opts})" }
152
+ unless dry_run?
153
+ response = client.insert_dataset(project, body, opts)
154
+ end
155
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
156
+ if e.status_code == 409 && /Already Exists:/ =~ e.message
157
+ # ignore 'Already Exists' error
158
+ return {}
159
+ end
160
+
161
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
162
+ raise Error, "Failed to insert_dataset(#{project}, #{body}, #{opts}), response:#{response}"
163
+ end
164
+
165
+ { responses: { insert_dataset: response } }
166
+ end
167
+ alias :create_dataset :insert_dataset
168
+
169
+ def get_table(dataset: nil, table: nil)
170
+ dataset ||= self.dataset
171
+ table ||= self.table
172
+ begin
173
+ logger.debug { "Get table... #{project}:#{dataset}.#{table}" }
174
+ response = client.get_table(project, dataset, table)
175
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
176
+ if e.status_code == 404 # not found
177
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
178
+ end
179
+
180
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
181
+ raise Error, "Failed to get_table(#{project}, #{dataset}, #{table}), response:#{response}"
182
+ end
183
+
184
+ { responses: { get_table: response } }
185
+ end
186
+
187
+ def insert_table(dataset: nil, table: nil, columns: )
188
+ dataset ||= self.dataset
189
+ table ||= self.table
190
+ schema = Schema.new(columns)
191
+
192
+ begin
193
+ logger.info { "#{head}Insert (create) table... #{project}:#{dataset}.#{table}" }
194
+ body = {
195
+ table_reference: {
196
+ table_id: table,
197
+ },
198
+ schema: {
199
+ fields: schema,
200
+ }
201
+ }
202
+ opts = {}
203
+ logger.debug { "#{head}insert_table(#{project}, #{dataset}, #{body}, #{opts})" }
204
+ unless dry_run?
205
+ response = client.insert_table(project, dataset, body, opts)
206
+ end
207
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
208
+ if e.status_code == 409 && /Already Exists:/ =~ e.message
209
+ # ignore 'Already Exists' error
210
+ return {}
211
+ end
212
+
213
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
214
+ raise Error, "Failed to insert_table(#{project}, #{dataset}, #{body}, #{opts}), response:#{response}"
215
+ end
216
+
217
+ { responses: { insert_table: response } }
218
+ end
219
+ alias :create_table :insert_table
220
+
221
+ def delete_table(dataset: nil, table: nil)
222
+ dataset ||= self.dataset
223
+ table ||= self.table
224
+
225
+ begin
226
+ logger.info { "#{head}Delete (drop) table... #{project}:#{dataset}.#{table}" }
227
+ unless dry_run?
228
+ client.delete_table(project, dataset, table) # no response
229
+ success = true
230
+ end
231
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
232
+ if e.status_code == 404 && /Not found:/ =~ e.message
233
+ # ignore 'Not Found' error
234
+ return {}
235
+ end
236
+
237
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
238
+ raise Error, "Failed to delete_table(#{project}, #{dataset}, #{table}), response:#{response}"
239
+ end
240
+
241
+ { success: success }
242
+ end
243
+ alias :drop_table :delete_table
244
+
245
+ def list_tables(dataset: nil, max_results: 999999)
246
+ dataset ||= self.dataset
247
+
248
+ tables = []
249
+ begin
250
+ logger.info { "List tables... #{project}:#{dataset}" }
251
+ response = client.list_tables(project, dataset, max_results: max_results)
252
+ while true
253
+ _tables = (response.tables || []).map { |t| t.table_reference.table_id.to_s }
254
+ tables.concat(_tables)
255
+ if next_page_token = response.next_page_token
256
+ response = client.list_tables(project, dataset, page_token: next_page_token, max_results: max_results)
257
+ else
258
+ break
259
+ end
260
+ end
261
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
262
+ if e.status_code == 404 && /Not found:/ =~ e.message
263
+ railse NotFoundError, "Dataset #{project}:#{dataset} is not found"
264
+ end
265
+
266
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
267
+ logger.error { "list_tables(#{project}, #{dataset}), response:#{response}" }
268
+ raise Error, "failed to list tables #{project}:#{dataset}, response:#{response}"
269
+ end
270
+
271
+ { tables: tables }
272
+ end
273
+
274
+ def purge_tables(dataset: nil, table_prefix: , suffix_format: , purge_before: , timezone: nil)
275
+ dataset ||= self.dataset
276
+ timezone ||= Time.now.strftime('%z')
277
+
278
+ before_tables = list_tables[:tables]
279
+
280
+ purge_before_t = TimeWithZone.strptime_with_zone(purge_before, suffix_format, timezone)
281
+ tables = before_tables.select do |tbl|
282
+ suffix = tbl.gsub(table_prefix, '')
283
+ begin
284
+ suffix_t = TimeWithZone.strptime_with_zone(suffix, suffix_format, timezone)
285
+ rescue
286
+ next
287
+ end
288
+ # skip if different from the suffix_format
289
+ next if suffix_t.strftime(suffix_format) != suffix
290
+ suffix_t <= purge_before_t
291
+ end
292
+
293
+ tables.each do |_table|
294
+ delete_table(table: _table)
295
+ # If you make more than 100 requests per second, throttling might occur.
296
+ # See https://cloud.google.com/bigquery/quota-policy#apirequests
297
+ sleep 1
298
+ end
299
+
300
+ { delete_tables: tables }
301
+ end
302
+
303
+ # rows:
304
+ # - id: 1
305
+ # type: one
306
+ # record:
307
+ # child1: 'child1'
308
+ # child2: 'child2'
309
+ # - id: 2
310
+ # type: two
311
+ # record:
312
+ # child1: 'child3'
313
+ # child2: 'child4'
314
+ def insert_all_table_data(dataset: nil, table: nil, rows: )
315
+ dataset ||= self.dataset
316
+ table ||= self.table
317
+
318
+ begin
319
+ logger.info { "#{head}insertAll tableData... #{project}:#{dataset}.#{table}" }
320
+ body = {
321
+ rows: rows.map {|row| { json: row } },
322
+ }
323
+ opts = {}
324
+ unless dry_run?
325
+ response = client.insert_all_table_data(project, dataset, table, body, opts)
326
+ end
327
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
328
+ if e.status_code == 404 # not found
329
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
330
+ end
331
+
332
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
333
+ Medjed::Bulk.logger.error {
334
+ "insert_all_table_data(#{project}, #{dataset}, #{table}, #{opts}), response:#{response}"
335
+ }
336
+ raise Error, "failed to insert_all table_data #{project}:#{dataset}.#{table}, response:#{response}"
337
+ end
338
+
339
+ { responses: { insert_all_table_data: response } }
340
+ end
341
+
342
+ # @return Hash result of list table_data
343
+ #
344
+ # Example:
345
+ # {
346
+ # columns:
347
+ # [
348
+ # {
349
+ # name: id,
350
+ # type: INTEGER
351
+ # },
352
+ # {
353
+ # name: type,
354
+ # type: STRING
355
+ # },
356
+ # {
357
+ # name: record.child1,
358
+ # type: STRING
359
+ # },
360
+ # {
361
+ # name: record.child2,
362
+ # type: STRING
363
+ # },
364
+ # values:
365
+ # [
366
+ # [2,"two","child3","child4"],
367
+ # [1,"one","child1","child2"]
368
+ # ],
369
+ # total_rows: 2
370
+ # }
371
+ def list_table_data(dataset: nil, table: nil, max_results: 100)
372
+ dataset ||= self.dataset
373
+ table ||= self.table
374
+
375
+ begin
376
+ logger.info { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" }
377
+ response = client.list_table_data(project, dataset, table, max_results: max_results)
378
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
379
+ if e.status_code == 404 # not found
380
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
381
+ end
382
+
383
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
384
+ logger.error { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" }
385
+ raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}"
386
+ end
387
+
388
+ flattened_columns = Schema.new(existing_columns).flattened_columns.map do |name, column|
389
+ {name: name}.merge!(column)
390
+ end
391
+ if rows = response.to_h[:rows]
392
+ flattened_values = flatten_values(rows)
393
+ end
394
+
395
+ {
396
+ total_rows: response.total_rows,
397
+ columns: flattened_columns,
398
+ values: flattened_values,
399
+ response: {
400
+ list_table_data: response,
401
+ }
402
+ }
403
+ end
404
+
405
+ private def flatten_values(rows)
406
+ rows.map do |r|
407
+ if r.key?(:f)
408
+ r[:f].map do |f|
409
+ if f[:v].respond_to?(:key?) && f[:v].key?(:f)
410
+ flatten_values(f[:v][:f])
411
+ else
412
+ f[:v]
413
+ end
414
+ end.flatten
415
+ else
416
+ r[:v]
417
+ end
418
+ end
419
+ end
420
+
421
+ def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil)
422
+ dataset ||= self.dataset
423
+ table ||= self.table
424
+
425
+ if columns.nil? and add_columns.nil?
426
+ raise ArgumentError, 'patch_table: `columns` or `add_columns` is required'
427
+ end
428
+
429
+ before_columns = existing_columns
430
+ if columns # if already given
431
+ schema = Schema.new(columns)
432
+ else
433
+ schema = Schema.new(add_columns)
434
+ schema.reverse_merge!(before_columns)
435
+ end
436
+ schema.validate_permitted_operations!(before_columns)
437
+
438
+ begin
439
+ logger.info { "#{head}Patch table... #{project}:#{dataset}.#{table}" }
440
+ fields = schema.map {|column| HashUtil.deep_symbolize_keys(column) }
441
+ body = {
442
+ schema: {
443
+ fields: fields,
444
+ }
445
+ }
446
+ opts = {}
447
+ logger.debug { "#{head}patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts})" }
448
+ unless dry_run?
449
+ response = client.patch_table(project, dataset, table, body, options: opts)
450
+ end
451
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
452
+ if e.status_code == 404 # not found
453
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
454
+ end
455
+
456
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
457
+ logger.error {
458
+ "patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts}), response:#{response}"
459
+ }
460
+ raise Error, "Failed to patch table #{project}:#{dataset}.#{table}, response:#{response}"
461
+ end
462
+
463
+ after_columns = existing_columns
464
+
465
+ {
466
+ before_columns: before_columns,
467
+ after_columns: after_columns,
468
+ responses: { patch_table: response },
469
+ }
470
+ end
471
+ alias :add_column :patch_table
472
+
473
+ def copy_table(destination_table:, destination_dataset: nil, source_table: nil, source_dataset: nil, write_disposition: 'WRITE_TRUNCATE')
474
+ source_table ||= self.table
475
+ source_dataset ||= self.dataset
476
+ destination_dataset ||= source_dataset
477
+
478
+ body = {
479
+ configuration: {
480
+ copy: {
481
+ create_deposition: 'CREATE_IF_NEEDED',
482
+ write_disposition: write_disposition,
483
+ source_table: {
484
+ project_id: project,
485
+ dataset_id: source_dataset,
486
+ table_id: source_table,
487
+ },
488
+ destination_table: {
489
+ project_id: project,
490
+ dataset_id: destination_dataset,
491
+ table_id: destination_table,
492
+ },
493
+ }
494
+ }
495
+ }
496
+ opts = {}
497
+
498
+ logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" }
499
+ unless dry_run?
500
+ response = client.insert_job(project, body, opts)
501
+ get_response = wait_load('copy', response)
502
+ end
503
+
504
+ {
505
+ responses: {
506
+ insert_job: response,
507
+ last_get_job: get_response,
508
+ }
509
+ }
510
+ end
511
+
512
+ def insert_select(query:, destination_table: nil, destination_dataset: nil, write_disposition: 'WRITE_TRUNCATE')
513
+ destination_table ||= self.table
514
+ destination_dataset ||= self.dataset
515
+
516
+ body = {
517
+ configuration: {
518
+ query: {
519
+ allow_large_results: true,
520
+ flatten_results: false,
521
+ write_disposition: write_disposition,
522
+ query: query,
523
+ destination_table: {
524
+ project_id: self.project,
525
+ dataset_id: destination_dataset,
526
+ table_id: destination_table,
527
+ },
528
+ }
529
+ }
530
+ }
531
+ opts = {}
532
+
533
+ logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" }
534
+ unless dry_run?
535
+ response = client.insert_job(project, body, opts)
536
+ get_response = wait_load('query', response)
537
+ end
538
+
539
+ {
540
+ responses: {
541
+ insert_job: response,
542
+ last_get_job: get_response,
543
+ }
544
+ }
545
+ end
546
+
547
+ private def wait_load(kind, response)
548
+ started = Time.now
549
+
550
+ wait_interval = self.job_status_polling_interval
551
+ max_polling_time = self.job_status_max_polling_time
552
+ _response = response
553
+
554
+ while true
555
+ job_id = _response.job_reference.job_id
556
+ elapsed = Time.now - started
557
+ status = _response.status.state
558
+ if status == "DONE"
559
+ logger.info {
560
+ "#{kind} job completed... " \
561
+ "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
562
+ }
563
+ break
564
+ elsif elapsed.to_i > max_polling_time
565
+ message = "Checking #{kind} job status... " \
566
+ "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[TIMEOUT]"
567
+ logger.info { message }
568
+ raise JobTimeoutError.new(message)
569
+ else
570
+ logger.info {
571
+ "Checking #{kind} job status... " \
572
+ "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
573
+ }
574
+ sleep wait_interval
575
+ _response = client.get_job(project, job_id)
576
+ end
577
+ end
578
+
579
+ # cf. http://www.rubydoc.info/github/google/google-api-ruby-client/Google/Apis/BigqueryV2/JobStatus#errors-instance_method
580
+ # `errors` returns Array<Google::Apis::BigqueryV2::ErrorProto> if any error exists.
581
+ # Otherwise, this returns nil.
582
+ if _errors = _response.status.errors
583
+ raise Error, "Failed during waiting a job, get_job(#{project}, #{job_id}), errors:#{_errors.map(&:to_h)}"
584
+ end
585
+
586
+ _response
587
+ end
588
+
589
+ def drop_column(table: nil, columns: nil, drop_columns: nil, backup_dataset: nil, backup_table: nil)
590
+ table ||= self.table
591
+ backup_dataset ||= self.dataset
592
+ if columns.nil? and drop_columns.nil?
593
+ raise ArgumentError, '`drop_columns` or `columns` is required'
594
+ end
595
+
596
+ result = { responses: {} }
597
+
598
+ before_columns = existing_columns
599
+
600
+ if columns # if already given
601
+ schema = Schema.new(columns)
602
+ else
603
+ schema = Schema.new(existing_columns)
604
+ schema.reject_columns!(drop_columns)
605
+ end
606
+ if schema.empty? && !dry_run?
607
+ raise Error, 'No column is remained'
608
+ end
609
+
610
+ schema.validate_permitted_operations!(before_columns)
611
+
612
+ unless backup_dataset == self.dataset
613
+ create_dataset(dataset: backup_dataset)
614
+ end
615
+
616
+ if backup_table
617
+ _result = copy_table(source_table: table, destination_table: backup_table, destination_dataset: backup_dataset)
618
+ result[:responses].merge!(_result[:responses])
619
+ end
620
+
621
+ unless (add_columns = schema.diff_columns_by_name(before_columns)).empty?
622
+ _result = patch_table(add_columns: add_columns)
623
+ result[:responses].merge!(_result[:responses])
624
+ end
625
+
626
+ query_fields = schema.build_query_fields(before_columns)
627
+ query = "SELECT #{query_fields.join(',')} FROM [#{dataset}.#{table}]"
628
+ _result = insert_select(query: query, destination_table: table)
629
+ result[:responses].merge!(_result[:responses])
630
+
631
+ after_columns = existing_columns
632
+
633
+ result.merge!({before_columns: before_columns, after_columns: after_columns})
634
+ end
635
+
636
+ def migrate_table(table: nil, schema_file: nil, columns: nil, backup_dataset: nil, backup_table: nil)
637
+ table ||= self.table
638
+ backup_dataset ||= self.dataset
639
+
640
+ if schema_file.nil? and columns.nil?
641
+ raise ArgumentError, '`schema_file` or `columns` is required'
642
+ end
643
+ if schema_file
644
+ columns = HashUtil.deep_symbolize_keys(JSON.parse(File.read(schema_file)))
645
+ end
646
+ Schema.validate_columns!(columns)
647
+
648
+ before_columns = existing_columns
649
+
650
+ result = {}
651
+ if before_columns.empty?
652
+ result = create_table(table: table, columns: columns)
653
+ else
654
+ add_columns = Schema.diff_columns(before_columns, columns)
655
+ drop_columns = Schema.diff_columns(columns, before_columns)
656
+
657
+ if !drop_columns.empty?
658
+ drop_column(table: table, columns: columns,
659
+ backup_dataset: backup_dataset, backup_table: backup_table)
660
+ elsif !add_columns.empty?
661
+ add_column(table: table, columns: columns)
662
+ end
663
+ end
664
+
665
+ after_columns = existing_columns
666
+
667
+ if after_columns.empty? and !dry_run?
668
+ raise Error, "after_columns is empty. " \
669
+ "before_columns: #{before_columns}, after_columns: after_columns, columns: #{columns}"
670
+ end
671
+
672
+ result.merge!( before_columns: before_columns, after_columns: after_columns )
673
+ end
674
+ end
675
+ end