bigquery_migration 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +29 -2
- data/bigquery_migration.gemspec +1 -0
- data/example/application_default.yml +23 -0
- data/example/copy_table.yml +1 -1
- data/example/example.yml +1 -1
- data/example/insert_select.yml +1 -1
- data/example/migrate_partitioned_table.yml +1 -1
- data/example/migrate_table.yml +1 -1
- data/example/table_info.yml +1 -1
- data/exe/bq-migrate +1 -1
- data/lib/bigquery_migration/bigquery_wrapper.rb +151 -70
- data/lib/bigquery_migration/version.rb +1 -1
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97e77d9eca83b83c064a4e5328dbd3d776633290
|
4
|
+
data.tar.gz: 8de9b03871570d5558e29a16d84b0fbef6873fc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7de67b97655404b7f4a9f209484932252ed9d64da3e5535830012ce22cf6fcb3ed1283f13e8859017072bcaff43627d069f9b03851d97b00157c262b82351468
|
7
|
+
data.tar.gz: f82e01cb2b9d25c24344dd839d56807039c7c24fdf4ea055b308a02b84feecb49997ce73c7faf698b6c6345a9c3435350be8fb416d2a53894918fad6836c502e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -95,13 +95,40 @@ This tool has an advantage that it is **faster** than reloading data entirely.
|
|
95
95
|
|
96
96
|
### Run example:
|
97
97
|
|
98
|
-
|
98
|
+
**Service Account**
|
99
|
+
|
100
|
+
Prepare your service account json at `example/your-project-000.json`, then
|
99
101
|
|
100
102
|
```
|
101
103
|
$ bundle exec bq_migrate run example/example.yml # dry-run
|
102
104
|
$ bundle exec bq_migrate run example/example.yml --exec
|
103
105
|
```
|
104
106
|
|
107
|
+
**OAuth**
|
108
|
+
|
109
|
+
Install gcloud into your development environment:
|
110
|
+
|
111
|
+
```
|
112
|
+
curl https://sdk.cloud.google.com | bash
|
113
|
+
gcloud init
|
114
|
+
gcloud auth login
|
115
|
+
gcloud auth application-default login
|
116
|
+
gcloud config set project <GCP_PROJECT_NAME>
|
117
|
+
```
|
118
|
+
|
119
|
+
Make sure `gcloud` works
|
120
|
+
|
121
|
+
```
|
122
|
+
gcloud compute instances list
|
123
|
+
```
|
124
|
+
|
125
|
+
Run as:
|
126
|
+
|
127
|
+
```
|
128
|
+
$ bundle exec bq_migrate run example/application_default.yml # dry-run
|
129
|
+
$ bundle exec bq_migrate run example/application_default.yml --exec
|
130
|
+
```
|
131
|
+
|
105
132
|
### Run test:
|
106
133
|
|
107
134
|
```
|
@@ -110,7 +137,7 @@ $ bundle exec rake test
|
|
110
137
|
|
111
138
|
To run tests which directly connects to BigQuery, prepare `example/your-project-000.json`, then
|
112
139
|
|
113
|
-
|
140
|
+
```
|
114
141
|
$ bundle exec rake test
|
115
142
|
```
|
116
143
|
|
data/bigquery_migration.gemspec
CHANGED
@@ -22,6 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_dependency "google-api-client"
|
23
23
|
spec.add_dependency "tzinfo"
|
24
24
|
spec.add_dependency "thor"
|
25
|
+
spec.add_dependency "inifile"
|
25
26
|
|
26
27
|
spec.add_development_dependency "bundler", "~> 1.11"
|
27
28
|
spec.add_development_dependency "rake", "~> 10.0"
|
@@ -0,0 +1,23 @@
|
|
1
|
+
bigquery: &bigquery
|
2
|
+
# project: read from ~/.config/gcloud/configurations/config_default
|
3
|
+
dataset: your_dataset_name
|
4
|
+
table: your_table_name
|
5
|
+
|
6
|
+
actions:
|
7
|
+
- action: create_dataset
|
8
|
+
<<: *bigquery
|
9
|
+
- action: migrate_table
|
10
|
+
<<: *bigquery
|
11
|
+
columns:
|
12
|
+
- { name: 'timestamp', type: 'TIMESTAMP' }
|
13
|
+
- name: 'record'
|
14
|
+
type: 'RECORD'
|
15
|
+
fields:
|
16
|
+
- { name: 'string', type: 'STRING' }
|
17
|
+
- { name: 'integer', type: 'INTEGER' }
|
18
|
+
- { name: 'bytes', type: 'BYTES' }
|
19
|
+
- action: migrate_table
|
20
|
+
<<: *bigquery
|
21
|
+
schema_file: example/schema.json
|
22
|
+
- action: delete_table
|
23
|
+
<<: *bigquery
|
data/example/copy_table.yml
CHANGED
data/example/example.yml
CHANGED
@@ -1 +1 @@
|
|
1
|
-
migrate_table.yml
|
1
|
+
example/migrate_table.yml
|
data/example/insert_select.yml
CHANGED
data/example/migrate_table.yml
CHANGED
data/example/table_info.yml
CHANGED
data/exe/bq-migrate
CHANGED
@@ -1 +1 @@
|
|
1
|
-
bq_migrate
|
1
|
+
exe/bq_migrate
|
@@ -8,6 +8,7 @@ require_relative 'hash_util'
|
|
8
8
|
require 'google/apis/bigquery_v2'
|
9
9
|
require 'google/api_client/auth/key_utils'
|
10
10
|
require 'securerandom'
|
11
|
+
require 'inifile'
|
11
12
|
|
12
13
|
class BigqueryMigration
|
13
14
|
class BigqueryWrapper
|
@@ -20,92 +21,50 @@ class BigqueryMigration
|
|
20
21
|
def initialize(config, opts = {})
|
21
22
|
@config = HashUtil.deep_symbolize_keys(config)
|
22
23
|
@opts = HashUtil.deep_symbolize_keys(opts)
|
23
|
-
configure
|
24
|
-
end
|
25
|
-
|
26
|
-
def configure
|
27
|
-
if json_keyfile = config[:json_keyfile]
|
28
|
-
json_key =
|
29
|
-
case json_keyfile
|
30
|
-
when String
|
31
|
-
File.read(json_keyfile)
|
32
|
-
when Hash
|
33
|
-
json_keyfile[:content]
|
34
|
-
else
|
35
|
-
raise ConfigError.new "Unsupported json_keyfile type"
|
36
|
-
end
|
37
|
-
json_keyparams =
|
38
|
-
begin
|
39
|
-
case json_key
|
40
|
-
when String
|
41
|
-
HashUtil.deep_symbolize_keys(JSON.parse(json_key))
|
42
|
-
when Hash
|
43
|
-
HashUtil.deep_symbolize_keys(json_key)
|
44
|
-
end
|
45
|
-
rescue => e
|
46
|
-
raise ConfigError.new "json_keyfile is not a JSON file"
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
if json_keyparams
|
51
|
-
config[:project] ||= json_keyparams[:project_id]
|
52
|
-
config[:json_key] = json_keyparams.to_json
|
53
|
-
end
|
54
|
-
|
55
|
-
config[:retries] ||= 5
|
56
|
-
end
|
57
|
-
|
58
|
-
def project
|
59
|
-
@project ||= config[:project] || raise(ConfigError, '`project` is required.')
|
60
|
-
end
|
61
|
-
|
62
|
-
def dataset
|
63
|
-
@dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
|
64
|
-
end
|
65
|
-
|
66
|
-
def table
|
67
|
-
@table ||= config[:table] || raise(ConfigError, '`table` is required.')
|
68
|
-
end
|
69
|
-
|
70
|
-
def job_status_polling_interval
|
71
|
-
@job_status_polling_interval ||= config[:job_status_polling_interval] || 5
|
72
|
-
end
|
73
|
-
|
74
|
-
def job_status_max_polling_time
|
75
|
-
@job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
|
76
|
-
end
|
77
|
-
|
78
|
-
def dry_run?
|
79
|
-
@opts[:dry_run]
|
80
|
-
end
|
81
|
-
|
82
|
-
def head
|
83
|
-
dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
|
84
24
|
end
|
85
25
|
|
86
26
|
def client
|
87
27
|
return @cached_client if @cached_client && @cached_client_expiration > Time.now
|
88
28
|
|
89
29
|
client = Google::Apis::BigqueryV2::BigqueryService.new
|
90
|
-
client.request_options.retries =
|
30
|
+
client.request_options.retries = retries
|
31
|
+
client.client_options.open_timeout_sec = open_timeout_sec
|
91
32
|
if client.request_options.respond_to?(:timeout_sec)
|
92
|
-
client.
|
93
|
-
client.request_options.timeout_sec = config[:timeout_sec] || 300
|
33
|
+
client.client_options.timeout_sec = timeout_sec
|
94
34
|
else # google-api-ruby-client >= v0.11.0
|
95
|
-
if
|
35
|
+
if timeout_sec
|
96
36
|
logger.warn { "timeout_sec is deprecated in google-api-ruby-client >= v0.11.0. Use read_timeout_sec instead" }
|
97
37
|
end
|
98
|
-
client.client_options.
|
99
|
-
client.client_options.
|
100
|
-
client.client_options.read_timeout_sec = config[:read_timeout_sec] || config[:timeout_sec] || 300 # default: 60
|
38
|
+
client.client_options.send_timeout_sec = send_timeout_sec
|
39
|
+
client.client_options.read_timeout_sec = read_timeout_sec
|
101
40
|
end
|
102
41
|
logger.debug { "client_options: #{client.client_options.to_h}" }
|
103
42
|
logger.debug { "request_options: #{client.request_options.to_h}" }
|
104
43
|
|
105
44
|
scope = "https://www.googleapis.com/auth/bigquery"
|
106
45
|
|
107
|
-
|
108
|
-
|
46
|
+
case auth_method
|
47
|
+
when 'authorized_user'
|
48
|
+
auth = Signet::OAuth2::Client.new(
|
49
|
+
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
50
|
+
audience: "https://accounts.google.com/o/oauth2/token",
|
51
|
+
scope: scope,
|
52
|
+
client_id: credentials['client_id'],
|
53
|
+
client_secret: credentials['client_secret'],
|
54
|
+
refresh_token: credentials['refresh_token']
|
55
|
+
)
|
56
|
+
auth.refresh!
|
57
|
+
when 'compute_engine'
|
58
|
+
auth = Google::Auth::GCECredentials.new
|
59
|
+
when 'service_account'
|
60
|
+
key = StringIO.new(credentials.to_json)
|
61
|
+
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
62
|
+
when 'application_default'
|
63
|
+
auth = Google::Auth.get_application_default([scope])
|
64
|
+
else
|
65
|
+
raise ConfigError, "Unknown auth method: #{auth_method}"
|
66
|
+
end
|
67
|
+
|
109
68
|
client.authorization = auth
|
110
69
|
|
111
70
|
@cached_client_expiration = Time.now + 1800
|
@@ -747,5 +706,127 @@ class BigqueryMigration
|
|
747
706
|
|
748
707
|
result.merge!( before_columns: before_columns, after_columns: after_columns )
|
749
708
|
end
|
709
|
+
|
710
|
+
# For old version compatibility
|
711
|
+
# Use credentials_file or credentials instead
|
712
|
+
def json_key
|
713
|
+
if json_keyfile = config[:json_keyfile]
|
714
|
+
begin
|
715
|
+
case json_keyfile
|
716
|
+
when String
|
717
|
+
return JSON.parse(File.read(json_keyfile))
|
718
|
+
when Hash
|
719
|
+
return json_keyfile[:content]
|
720
|
+
else
|
721
|
+
raise ConfigError.new "Unsupported json_keyfile type"
|
722
|
+
end
|
723
|
+
rescue => e
|
724
|
+
raise ConfigError.new "json_keyfile is not a JSON file"
|
725
|
+
end
|
726
|
+
end
|
727
|
+
nil
|
728
|
+
end
|
729
|
+
|
730
|
+
# compute_engine, authorized_user, service_account
|
731
|
+
def auth_method
|
732
|
+
@auth_method ||= ENV['AUTH_METHOD'] || config.fetch(:auth_method, nil) || credentials['type'] || 'compute_engine'
|
733
|
+
end
|
734
|
+
|
735
|
+
def credentials
|
736
|
+
json_key || JSON.parse(config.fetch(:credentials, nil) || File.read(credentials_file))
|
737
|
+
end
|
738
|
+
|
739
|
+
def credentials_file
|
740
|
+
@credentials_file ||= File.expand_path(
|
741
|
+
# ref. https://developers.google.com/identity/protocols/application-default-credentials
|
742
|
+
ENV['GOOGLE_APPLICATION_CREDENTIALS'] ||
|
743
|
+
config.fetch(:credentials_file, nil) ||
|
744
|
+
(File.exist?(global_application_default_credentials_file) ? global_application_default_credentials_file : application_default_credentials_file)
|
745
|
+
)
|
746
|
+
end
|
747
|
+
|
748
|
+
def application_default_credentials_file
|
749
|
+
@application_default_credentials_file ||= File.expand_path("~/.config/gcloud/application_default_credentials.json")
|
750
|
+
end
|
751
|
+
|
752
|
+
def global_application_default_credentials_file
|
753
|
+
@global_application_default_credentials_file ||= '/etc/google/auth/application_default_credentials.json'
|
754
|
+
end
|
755
|
+
|
756
|
+
def config_default_file
|
757
|
+
File.expand_path('~/.config/gcloud/configurations/config_default')
|
758
|
+
end
|
759
|
+
|
760
|
+
def config_default
|
761
|
+
# {core:{account:'xxx',project:'xxx'},compute:{zone:'xxx}}
|
762
|
+
@config_default ||= File.readable?(config_default_file) ? HashUtil.deep_symbolize_keys(IniFile.load(config_default_file).to_h) : {}
|
763
|
+
end
|
764
|
+
|
765
|
+
def service_account_default
|
766
|
+
(config_default[:core] || {})[:account]
|
767
|
+
end
|
768
|
+
|
769
|
+
def project_default
|
770
|
+
(config_default[:core] || {})[:project]
|
771
|
+
end
|
772
|
+
|
773
|
+
def zone_default
|
774
|
+
(config_default[:compute] || {})[:zone]
|
775
|
+
end
|
776
|
+
|
777
|
+
def service_account
|
778
|
+
@service_account ||= ENV['GOOGLE_SERVICE_ACCOUNT'] || config.fetch(:service_account, nil) || credentials['client_email'] || service_account_default
|
779
|
+
end
|
780
|
+
|
781
|
+
def retries
|
782
|
+
@retries ||= ENV['RETRIES'] || config.fetch(:retries, nil) || 5
|
783
|
+
end
|
784
|
+
|
785
|
+
# For google-api-client < 0.11.0. Deprecated
|
786
|
+
def timeout_sec
|
787
|
+
@timeout_sec ||= ENV['TIMEOUT_SEC'] || config.fetch(:timeout_sec, nil)
|
788
|
+
end
|
789
|
+
|
790
|
+
def send_timeout_sec
|
791
|
+
@send_timeout_sec ||= ENV['SEND_TIMEOUT_SEC'] || config.fetch(:send_timeout_sec, nil) || 60
|
792
|
+
end
|
793
|
+
|
794
|
+
def read_timeout_sec
|
795
|
+
@read_timeout_sec ||= ENV['READ_TIMEOUT_SEC'] || config.fetch(:read_timeout_sec, nil) || timeout_sec || 300
|
796
|
+
end
|
797
|
+
|
798
|
+
def open_timeout_sec
|
799
|
+
@open_timeout_sec ||= ENV['OPEN_TIMEOUT_SEC'] || config.fetch(:open_timeout_sec, nil) || 300
|
800
|
+
end
|
801
|
+
|
802
|
+
def project
|
803
|
+
@project ||= ENV['GOOGLE_PROJECT'] || config.fetch(:project, nil) || credentials['project_id']
|
804
|
+
@project ||= credentials['client_email'].chomp('.iam.gserviceaccount.com').split('@').last if credentials['client_email']
|
805
|
+
@project ||= project_default || raise(ConfigError, '`project` is required.')
|
806
|
+
end
|
807
|
+
|
808
|
+
def dataset
|
809
|
+
@dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
|
810
|
+
end
|
811
|
+
|
812
|
+
def table
|
813
|
+
@table ||= config[:table] || raise(ConfigError, '`table` is required.')
|
814
|
+
end
|
815
|
+
|
816
|
+
def job_status_polling_interval
|
817
|
+
@job_status_polling_interval ||= config[:job_status_polling_interval] || 5
|
818
|
+
end
|
819
|
+
|
820
|
+
def job_status_max_polling_time
|
821
|
+
@job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
|
822
|
+
end
|
823
|
+
|
824
|
+
def dry_run?
|
825
|
+
@opts[:dry_run]
|
826
|
+
end
|
827
|
+
|
828
|
+
def head
|
829
|
+
dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
|
830
|
+
end
|
750
831
|
end
|
751
832
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bigquery_migration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-api-client
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: inifile
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: bundler
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +171,7 @@ files:
|
|
157
171
|
- bigquery_migration.gemspec
|
158
172
|
- bin/console
|
159
173
|
- bin/setup
|
174
|
+
- example/application_default.yml
|
160
175
|
- example/copy_table.yml
|
161
176
|
- example/example.yml
|
162
177
|
- example/insert_select.yml
|
@@ -199,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
199
214
|
version: '0'
|
200
215
|
requirements: []
|
201
216
|
rubyforge_project:
|
202
|
-
rubygems_version: 2.
|
217
|
+
rubygems_version: 2.6.11
|
203
218
|
signing_key:
|
204
219
|
specification_version: 4
|
205
220
|
summary: Migrate BigQuery table schema
|