bigquery_migration 0.1.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ require_relative 'config_loader'
2
+ require_relative 'error'
3
+ require_relative 'action'
4
+ require_relative 'hash_util'
5
+
6
+ class BigqueryMigration
7
+ class ActionRunner
8
+ attr_reader :config, :config_path, :opts
9
+
10
+ def initialize(config_path = nil, opts = {})
11
+ @config_path = config_path
12
+ @opts = opts
13
+ config = ConfigLoader.new(@config_path, opts[:vars]).load
14
+ @config = HashUtil.deep_symbolize_keys(config)
15
+ validate_config!
16
+ end
17
+
18
+ def run
19
+ success, responses = run_actions
20
+ { success: success, dry_run: @opts[:dry_run], actions: responses }
21
+ end
22
+
23
+ def run_actions
24
+ success = true
25
+ responses = []
26
+
27
+ @config[:actions].each do |action_config|
28
+ _success, result = Action.new(action_config, @opts).run
29
+ response = action_config.merge({'result' => result})
30
+ responses << response
31
+ unless _success
32
+ success = false
33
+ break
34
+ end
35
+ end
36
+
37
+ [success, responses]
38
+ end
39
+
40
+ def validate_config!
41
+ unless config.is_a?(Hash)
42
+ raise ConfigError, "config file format has to be YAML Hash"
43
+ end
44
+
45
+ unless config[:actions]
46
+ raise ConfigError, "config must have `actions` key"
47
+ end
48
+
49
+ unless config[:actions].is_a?(Array)
50
+ raise ConfigError, "config[:actions] must be an Array"
51
+ end
52
+
53
+ config[:actions].each do |action_config|
54
+ unless action_config[:action]
55
+ raise ConfigError, "Elements of `config[:actions]` must have `action` key"
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,675 @@
1
+ require 'csv'
2
+ require 'json'
3
+ require_relative 'schema'
4
+ require_relative 'error'
5
+ require_relative 'time_with_zone'
6
+ require_relative 'hash_util'
7
+ require 'google/apis/bigquery_v2'
8
+ require 'google/api_client/auth/key_utils'
9
+
10
+ class BigqueryMigration
11
+ class BigqueryWrapper
12
+ attr_reader :config
13
+
14
+ def logger
15
+ BigqueryMigration.logger
16
+ end
17
+
18
+ def initialize(config, opts = {})
19
+ @config = HashUtil.deep_symbolize_keys(config)
20
+ @opts = HashUtil.deep_symbolize_keys(opts)
21
+ configure
22
+ end
23
+
24
+ def configure
25
+ if json_keyfile = config[:json_keyfile]
26
+ config[:json_key] =
27
+ case json_keyfile
28
+ when String
29
+ File.read(json_keyfile)
30
+ when Hash
31
+ json_keyfile[:content]
32
+ else
33
+ raise ConfigError.new "Unsupported json_keyfile type"
34
+ end
35
+ else
36
+ config[:json_key] = {
37
+ project_id: config[:project_id],
38
+ service_email: config[:service_email],
39
+ private_key: config[:private_key],
40
+ }.to_json
41
+ end
42
+
43
+ if config[:json_key]
44
+ begin
45
+ jsonkey_params = JSON.parse(config[:json_key])
46
+ rescue => e
47
+ raise ConfigError.new "json_keyfile is not a JSON file"
48
+ end
49
+ end
50
+
51
+ if jsonkey_params
52
+ config[:project] ||= jsonkey_params['project_id']
53
+ end
54
+
55
+ config[:retries] ||= 5
56
+ config[:timeout_sec] ||= 300
57
+ config[:open_timeout_sec] ||= 300
58
+ end
59
+
60
+ def project
61
+ @project ||= config[:project] || raise(ConfigError, '`project` is required.')
62
+ end
63
+
64
+ def dataset
65
+ @dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
66
+ end
67
+
68
+ def table
69
+ @table ||= config[:table] || raise(ConfigError, '`table` is required.')
70
+ end
71
+
72
+ def job_status_polling_interval
73
+ @job_status_polling_interval ||= config[:job_status_polling_interval] || 5
74
+ end
75
+
76
+ def job_status_max_polling_time
77
+ @job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
78
+ end
79
+
80
+ def dry_run?
81
+ @opts[:dry_run]
82
+ end
83
+
84
+ def head
85
+ dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
86
+ end
87
+
88
+ def client
89
+ return @cached_client if @cached_client && @cached_client_expiration > Time.now
90
+
91
+ client = Google::Apis::BigqueryV2::BigqueryService.new
92
+ client.request_options.retries = config[:retries]
93
+ client.request_options.timeout_sec = config[:timeout_sec]
94
+ client.request_options.open_timeout_sec = config[:open_timeout_sec]
95
+ logger.debug { "client_options: #{client.client_options.to_h}" }
96
+ logger.debug { "request_options: #{client.request_options.to_h}" }
97
+
98
+ scope = "https://www.googleapis.com/auth/bigquery"
99
+
100
+ key = StringIO.new(config[:json_key])
101
+ auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
102
+ client.authorization = auth
103
+
104
+ @cached_client_expiration = Time.now + 1800
105
+ @cached_client = client
106
+ end
107
+
108
+ def existing_columns
109
+ begin
110
+ result = get_table
111
+ response = result[:responses][:get_table]
112
+ response.schema.fields.map {|column| column.to_h }
113
+ rescue NotFoundError
114
+ return []
115
+ end
116
+ end
117
+
118
+ def get_dataset(dataset: nil)
119
+ dataset ||= self.dataset
120
+ begin
121
+ logger.info { "Get dataset... #{project}:#{dataset}" }
122
+ client.get_dataset(project, dataset)
123
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
124
+ if e.status_code == 404
125
+ raise NotFoundError, "Dataset #{project}:#{dataset} is not found"
126
+ end
127
+
128
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
129
+ raise Error, "Failed to get_dataset(#{project}, #{dataset}), response:#{response}"
130
+ end
131
+
132
+ { responses: { get_dataset: response } }
133
+ end
134
+
135
+ def insert_dataset(dataset: nil, reference: nil)
136
+ dataset ||= self.dataset
137
+ begin
138
+ logger.info { "#{head}Insert (create) dataset... #{project}:#{dataset}" }
139
+ hint = {}
140
+ if reference
141
+ response = get_dataset(reference)
142
+ hint = { access: response.access }
143
+ end
144
+ body = {
145
+ dataset_reference: {
146
+ project_id: project,
147
+ dataset_id: dataset,
148
+ },
149
+ }.merge(hint)
150
+ opts = {}
151
+ logger.debug { "#{head}insert_dataset(#{project}, #{body}, #{opts})" }
152
+ unless dry_run?
153
+ response = client.insert_dataset(project, body, opts)
154
+ end
155
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
156
+ if e.status_code == 409 && /Already Exists:/ =~ e.message
157
+ # ignore 'Already Exists' error
158
+ return {}
159
+ end
160
+
161
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
162
+ raise Error, "Failed to insert_dataset(#{project}, #{body}, #{opts}), response:#{response}"
163
+ end
164
+
165
+ { responses: { insert_dataset: response } }
166
+ end
167
+ alias :create_dataset :insert_dataset
168
+
169
+ def get_table(dataset: nil, table: nil)
170
+ dataset ||= self.dataset
171
+ table ||= self.table
172
+ begin
173
+ logger.debug { "Get table... #{project}:#{dataset}.#{table}" }
174
+ response = client.get_table(project, dataset, table)
175
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
176
+ if e.status_code == 404 # not found
177
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
178
+ end
179
+
180
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
181
+ raise Error, "Failed to get_table(#{project}, #{dataset}, #{table}), response:#{response}"
182
+ end
183
+
184
+ { responses: { get_table: response } }
185
+ end
186
+
187
+ def insert_table(dataset: nil, table: nil, columns: )
188
+ dataset ||= self.dataset
189
+ table ||= self.table
190
+ schema = Schema.new(columns)
191
+
192
+ begin
193
+ logger.info { "#{head}Insert (create) table... #{project}:#{dataset}.#{table}" }
194
+ body = {
195
+ table_reference: {
196
+ table_id: table,
197
+ },
198
+ schema: {
199
+ fields: schema,
200
+ }
201
+ }
202
+ opts = {}
203
+ logger.debug { "#{head}insert_table(#{project}, #{dataset}, #{body}, #{opts})" }
204
+ unless dry_run?
205
+ response = client.insert_table(project, dataset, body, opts)
206
+ end
207
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
208
+ if e.status_code == 409 && /Already Exists:/ =~ e.message
209
+ # ignore 'Already Exists' error
210
+ return {}
211
+ end
212
+
213
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
214
+ raise Error, "Failed to insert_table(#{project}, #{dataset}, #{body}, #{opts}), response:#{response}"
215
+ end
216
+
217
+ { responses: { insert_table: response } }
218
+ end
219
+ alias :create_table :insert_table
220
+
221
+ def delete_table(dataset: nil, table: nil)
222
+ dataset ||= self.dataset
223
+ table ||= self.table
224
+
225
+ begin
226
+ logger.info { "#{head}Delete (drop) table... #{project}:#{dataset}.#{table}" }
227
+ unless dry_run?
228
+ client.delete_table(project, dataset, table) # no response
229
+ success = true
230
+ end
231
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
232
+ if e.status_code == 404 && /Not found:/ =~ e.message
233
+ # ignore 'Not Found' error
234
+ return {}
235
+ end
236
+
237
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
238
+ raise Error, "Failed to delete_table(#{project}, #{dataset}, #{table}), response:#{response}"
239
+ end
240
+
241
+ { success: success }
242
+ end
243
+ alias :drop_table :delete_table
244
+
245
+ def list_tables(dataset: nil, max_results: 999999)
246
+ dataset ||= self.dataset
247
+
248
+ tables = []
249
+ begin
250
+ logger.info { "List tables... #{project}:#{dataset}" }
251
+ response = client.list_tables(project, dataset, max_results: max_results)
252
+ while true
253
+ _tables = (response.tables || []).map { |t| t.table_reference.table_id.to_s }
254
+ tables.concat(_tables)
255
+ if next_page_token = response.next_page_token
256
+ response = client.list_tables(project, dataset, page_token: next_page_token, max_results: max_results)
257
+ else
258
+ break
259
+ end
260
+ end
261
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
262
+ if e.status_code == 404 && /Not found:/ =~ e.message
263
+ railse NotFoundError, "Dataset #{project}:#{dataset} is not found"
264
+ end
265
+
266
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
267
+ logger.error { "list_tables(#{project}, #{dataset}), response:#{response}" }
268
+ raise Error, "failed to list tables #{project}:#{dataset}, response:#{response}"
269
+ end
270
+
271
+ { tables: tables }
272
+ end
273
+
274
+ def purge_tables(dataset: nil, table_prefix: , suffix_format: , purge_before: , timezone: nil)
275
+ dataset ||= self.dataset
276
+ timezone ||= Time.now.strftime('%z')
277
+
278
+ before_tables = list_tables[:tables]
279
+
280
+ purge_before_t = TimeWithZone.strptime_with_zone(purge_before, suffix_format, timezone)
281
+ tables = before_tables.select do |tbl|
282
+ suffix = tbl.gsub(table_prefix, '')
283
+ begin
284
+ suffix_t = TimeWithZone.strptime_with_zone(suffix, suffix_format, timezone)
285
+ rescue
286
+ next
287
+ end
288
+ # skip if different from the suffix_format
289
+ next if suffix_t.strftime(suffix_format) != suffix
290
+ suffix_t <= purge_before_t
291
+ end
292
+
293
+ tables.each do |_table|
294
+ delete_table(table: _table)
295
+ # If you make more than 100 requests per second, throttling might occur.
296
+ # See https://cloud.google.com/bigquery/quota-policy#apirequests
297
+ sleep 1
298
+ end
299
+
300
+ { delete_tables: tables }
301
+ end
302
+
303
+ # rows:
304
+ # - id: 1
305
+ # type: one
306
+ # record:
307
+ # child1: 'child1'
308
+ # child2: 'child2'
309
+ # - id: 2
310
+ # type: two
311
+ # record:
312
+ # child1: 'child3'
313
+ # child2: 'child4'
314
+ def insert_all_table_data(dataset: nil, table: nil, rows: )
315
+ dataset ||= self.dataset
316
+ table ||= self.table
317
+
318
+ begin
319
+ logger.info { "#{head}insertAll tableData... #{project}:#{dataset}.#{table}" }
320
+ body = {
321
+ rows: rows.map {|row| { json: row } },
322
+ }
323
+ opts = {}
324
+ unless dry_run?
325
+ response = client.insert_all_table_data(project, dataset, table, body, opts)
326
+ end
327
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
328
+ if e.status_code == 404 # not found
329
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
330
+ end
331
+
332
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
333
+ Medjed::Bulk.logger.error {
334
+ "insert_all_table_data(#{project}, #{dataset}, #{table}, #{opts}), response:#{response}"
335
+ }
336
+ raise Error, "failed to insert_all table_data #{project}:#{dataset}.#{table}, response:#{response}"
337
+ end
338
+
339
+ { responses: { insert_all_table_data: response } }
340
+ end
341
+
342
+ # @return Hash result of list table_data
343
+ #
344
+ # Example:
345
+ # {
346
+ # columns:
347
+ # [
348
+ # {
349
+ # name: id,
350
+ # type: INTEGER
351
+ # },
352
+ # {
353
+ # name: type,
354
+ # type: STRING
355
+ # },
356
+ # {
357
+ # name: record.child1,
358
+ # type: STRING
359
+ # },
360
+ # {
361
+ # name: record.child2,
362
+ # type: STRING
363
+ # },
364
+ # values:
365
+ # [
366
+ # [2,"two","child3","child4"],
367
+ # [1,"one","child1","child2"]
368
+ # ],
369
+ # total_rows: 2
370
+ # }
371
+ def list_table_data(dataset: nil, table: nil, max_results: 100)
372
+ dataset ||= self.dataset
373
+ table ||= self.table
374
+
375
+ begin
376
+ logger.info { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" }
377
+ response = client.list_table_data(project, dataset, table, max_results: max_results)
378
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
379
+ if e.status_code == 404 # not found
380
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
381
+ end
382
+
383
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
384
+ logger.error { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" }
385
+ raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}"
386
+ end
387
+
388
+ flattened_columns = Schema.new(existing_columns).flattened_columns.map do |name, column|
389
+ {name: name}.merge!(column)
390
+ end
391
+ if rows = response.to_h[:rows]
392
+ flattened_values = flatten_values(rows)
393
+ end
394
+
395
+ {
396
+ total_rows: response.total_rows,
397
+ columns: flattened_columns,
398
+ values: flattened_values,
399
+ response: {
400
+ list_table_data: response,
401
+ }
402
+ }
403
+ end
404
+
405
+ private def flatten_values(rows)
406
+ rows.map do |r|
407
+ if r.key?(:f)
408
+ r[:f].map do |f|
409
+ if f[:v].respond_to?(:key?) && f[:v].key?(:f)
410
+ flatten_values(f[:v][:f])
411
+ else
412
+ f[:v]
413
+ end
414
+ end.flatten
415
+ else
416
+ r[:v]
417
+ end
418
+ end
419
+ end
420
+
421
+ def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil)
422
+ dataset ||= self.dataset
423
+ table ||= self.table
424
+
425
+ if columns.nil? and add_columns.nil?
426
+ raise ArgumentError, 'patch_table: `columns` or `add_columns` is required'
427
+ end
428
+
429
+ before_columns = existing_columns
430
+ if columns # if already given
431
+ schema = Schema.new(columns)
432
+ else
433
+ schema = Schema.new(add_columns)
434
+ schema.reverse_merge!(before_columns)
435
+ end
436
+ schema.validate_permitted_operations!(before_columns)
437
+
438
+ begin
439
+ logger.info { "#{head}Patch table... #{project}:#{dataset}.#{table}" }
440
+ fields = schema.map {|column| HashUtil.deep_symbolize_keys(column) }
441
+ body = {
442
+ schema: {
443
+ fields: fields,
444
+ }
445
+ }
446
+ opts = {}
447
+ logger.debug { "#{head}patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts})" }
448
+ unless dry_run?
449
+ response = client.patch_table(project, dataset, table, body, options: opts)
450
+ end
451
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
452
+ if e.status_code == 404 # not found
453
+ raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
454
+ end
455
+
456
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
457
+ logger.error {
458
+ "patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts}), response:#{response}"
459
+ }
460
+ raise Error, "Failed to patch table #{project}:#{dataset}.#{table}, response:#{response}"
461
+ end
462
+
463
+ after_columns = existing_columns
464
+
465
+ {
466
+ before_columns: before_columns,
467
+ after_columns: after_columns,
468
+ responses: { patch_table: response },
469
+ }
470
+ end
471
+ alias :add_column :patch_table
472
+
473
+ def copy_table(destination_table:, destination_dataset: nil, source_table: nil, source_dataset: nil, write_disposition: 'WRITE_TRUNCATE')
474
+ source_table ||= self.table
475
+ source_dataset ||= self.dataset
476
+ destination_dataset ||= source_dataset
477
+
478
+ body = {
479
+ configuration: {
480
+ copy: {
481
+ create_deposition: 'CREATE_IF_NEEDED',
482
+ write_disposition: write_disposition,
483
+ source_table: {
484
+ project_id: project,
485
+ dataset_id: source_dataset,
486
+ table_id: source_table,
487
+ },
488
+ destination_table: {
489
+ project_id: project,
490
+ dataset_id: destination_dataset,
491
+ table_id: destination_table,
492
+ },
493
+ }
494
+ }
495
+ }
496
+ opts = {}
497
+
498
+ logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" }
499
+ unless dry_run?
500
+ response = client.insert_job(project, body, opts)
501
+ get_response = wait_load('copy', response)
502
+ end
503
+
504
+ {
505
+ responses: {
506
+ insert_job: response,
507
+ last_get_job: get_response,
508
+ }
509
+ }
510
+ end
511
+
512
+ def insert_select(query:, destination_table: nil, destination_dataset: nil, write_disposition: 'WRITE_TRUNCATE')
513
+ destination_table ||= self.table
514
+ destination_dataset ||= self.dataset
515
+
516
+ body = {
517
+ configuration: {
518
+ query: {
519
+ allow_large_results: true,
520
+ flatten_results: false,
521
+ write_disposition: write_disposition,
522
+ query: query,
523
+ destination_table: {
524
+ project_id: self.project,
525
+ dataset_id: destination_dataset,
526
+ table_id: destination_table,
527
+ },
528
+ }
529
+ }
530
+ }
531
+ opts = {}
532
+
533
+ logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" }
534
+ unless dry_run?
535
+ response = client.insert_job(project, body, opts)
536
+ get_response = wait_load('query', response)
537
+ end
538
+
539
+ {
540
+ responses: {
541
+ insert_job: response,
542
+ last_get_job: get_response,
543
+ }
544
+ }
545
+ end
546
+
547
+ private def wait_load(kind, response)
548
+ started = Time.now
549
+
550
+ wait_interval = self.job_status_polling_interval
551
+ max_polling_time = self.job_status_max_polling_time
552
+ _response = response
553
+
554
+ while true
555
+ job_id = _response.job_reference.job_id
556
+ elapsed = Time.now - started
557
+ status = _response.status.state
558
+ if status == "DONE"
559
+ logger.info {
560
+ "#{kind} job completed... " \
561
+ "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
562
+ }
563
+ break
564
+ elsif elapsed.to_i > max_polling_time
565
+ message = "Checking #{kind} job status... " \
566
+ "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[TIMEOUT]"
567
+ logger.info { message }
568
+ raise JobTimeoutError.new(message)
569
+ else
570
+ logger.info {
571
+ "Checking #{kind} job status... " \
572
+ "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
573
+ }
574
+ sleep wait_interval
575
+ _response = client.get_job(project, job_id)
576
+ end
577
+ end
578
+
579
+ # cf. http://www.rubydoc.info/github/google/google-api-ruby-client/Google/Apis/BigqueryV2/JobStatus#errors-instance_method
580
+ # `errors` returns Array<Google::Apis::BigqueryV2::ErrorProto> if any error exists.
581
+ # Otherwise, this returns nil.
582
+ if _errors = _response.status.errors
583
+ raise Error, "Failed during waiting a job, get_job(#{project}, #{job_id}), errors:#{_errors.map(&:to_h)}"
584
+ end
585
+
586
+ _response
587
+ end
588
+
589
+ def drop_column(table: nil, columns: nil, drop_columns: nil, backup_dataset: nil, backup_table: nil)
590
+ table ||= self.table
591
+ backup_dataset ||= self.dataset
592
+ if columns.nil? and drop_columns.nil?
593
+ raise ArgumentError, '`drop_columns` or `columns` is required'
594
+ end
595
+
596
+ result = { responses: {} }
597
+
598
+ before_columns = existing_columns
599
+
600
+ if columns # if already given
601
+ schema = Schema.new(columns)
602
+ else
603
+ schema = Schema.new(existing_columns)
604
+ schema.reject_columns!(drop_columns)
605
+ end
606
+ if schema.empty? && !dry_run?
607
+ raise Error, 'No column is remained'
608
+ end
609
+
610
+ schema.validate_permitted_operations!(before_columns)
611
+
612
+ unless backup_dataset == self.dataset
613
+ create_dataset(dataset: backup_dataset)
614
+ end
615
+
616
+ if backup_table
617
+ _result = copy_table(source_table: table, destination_table: backup_table, destination_dataset: backup_dataset)
618
+ result[:responses].merge!(_result[:responses])
619
+ end
620
+
621
+ unless (add_columns = schema.diff_columns_by_name(before_columns)).empty?
622
+ _result = patch_table(add_columns: add_columns)
623
+ result[:responses].merge!(_result[:responses])
624
+ end
625
+
626
+ query_fields = schema.build_query_fields(before_columns)
627
+ query = "SELECT #{query_fields.join(',')} FROM [#{dataset}.#{table}]"
628
+ _result = insert_select(query: query, destination_table: table)
629
+ result[:responses].merge!(_result[:responses])
630
+
631
+ after_columns = existing_columns
632
+
633
+ result.merge!({before_columns: before_columns, after_columns: after_columns})
634
+ end
635
+
636
+ def migrate_table(table: nil, schema_file: nil, columns: nil, backup_dataset: nil, backup_table: nil)
637
+ table ||= self.table
638
+ backup_dataset ||= self.dataset
639
+
640
+ if schema_file.nil? and columns.nil?
641
+ raise ArgumentError, '`schema_file` or `columns` is required'
642
+ end
643
+ if schema_file
644
+ columns = HashUtil.deep_symbolize_keys(JSON.parse(File.read(schema_file)))
645
+ end
646
+ Schema.validate_columns!(columns)
647
+
648
+ before_columns = existing_columns
649
+
650
+ result = {}
651
+ if before_columns.empty?
652
+ result = create_table(table: table, columns: columns)
653
+ else
654
+ add_columns = Schema.diff_columns(before_columns, columns)
655
+ drop_columns = Schema.diff_columns(columns, before_columns)
656
+
657
+ if !drop_columns.empty?
658
+ drop_column(table: table, columns: columns,
659
+ backup_dataset: backup_dataset, backup_table: backup_table)
660
+ elsif !add_columns.empty?
661
+ add_column(table: table, columns: columns)
662
+ end
663
+ end
664
+
665
+ after_columns = existing_columns
666
+
667
+ if after_columns.empty? and !dry_run?
668
+ raise Error, "after_columns is empty. " \
669
+ "before_columns: #{before_columns}, after_columns: after_columns, columns: #{columns}"
670
+ end
671
+
672
+ result.merge!( before_columns: before_columns, after_columns: after_columns )
673
+ end
674
+ end
675
+ end