bigquery_migration 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +29 -2
- data/bigquery_migration.gemspec +1 -0
- data/example/application_default.yml +23 -0
- data/example/copy_table.yml +1 -1
- data/example/example.yml +1 -1
- data/example/insert_select.yml +1 -1
- data/example/migrate_partitioned_table.yml +1 -1
- data/example/migrate_table.yml +1 -1
- data/example/table_info.yml +1 -1
- data/exe/bq-migrate +1 -1
- data/lib/bigquery_migration/bigquery_wrapper.rb +151 -70
- data/lib/bigquery_migration/version.rb +1 -1
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97e77d9eca83b83c064a4e5328dbd3d776633290
|
4
|
+
data.tar.gz: 8de9b03871570d5558e29a16d84b0fbef6873fc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7de67b97655404b7f4a9f209484932252ed9d64da3e5535830012ce22cf6fcb3ed1283f13e8859017072bcaff43627d069f9b03851d97b00157c262b82351468
|
7
|
+
data.tar.gz: f82e01cb2b9d25c24344dd839d56807039c7c24fdf4ea055b308a02b84feecb49997ce73c7faf698b6c6345a9c3435350be8fb416d2a53894918fad6836c502e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -95,13 +95,40 @@ This tool has an advantage that it is **faster** than reloading data entirely.
|
|
95
95
|
|
96
96
|
### Run example:
|
97
97
|
|
98
|
-
|
98
|
+
**Service Account**
|
99
|
+
|
100
|
+
Prepare your service account json at `example/your-project-000.json`, then
|
99
101
|
|
100
102
|
```
|
101
103
|
$ bundle exec bq_migrate run example/example.yml # dry-run
|
102
104
|
$ bundle exec bq_migrate run example/example.yml --exec
|
103
105
|
```
|
104
106
|
|
107
|
+
**OAuth**
|
108
|
+
|
109
|
+
Install gcloud into your development environment:
|
110
|
+
|
111
|
+
```
|
112
|
+
curl https://sdk.cloud.google.com | bash
|
113
|
+
gcloud init
|
114
|
+
gcloud auth login
|
115
|
+
gcloud auth application-default login
|
116
|
+
gcloud config set project <GCP_PROJECT_NAME>
|
117
|
+
```
|
118
|
+
|
119
|
+
Make sure `gcloud` works
|
120
|
+
|
121
|
+
```
|
122
|
+
gcloud compute instances list
|
123
|
+
```
|
124
|
+
|
125
|
+
Run as:
|
126
|
+
|
127
|
+
```
|
128
|
+
$ bundle exec bq_migrate run example/application_default.yml # dry-run
|
129
|
+
$ bundle exec bq_migrate run example/application_default.yml --exec
|
130
|
+
```
|
131
|
+
|
105
132
|
### Run test:
|
106
133
|
|
107
134
|
```
|
@@ -110,7 +137,7 @@ $ bundle exec rake test
|
|
110
137
|
|
111
138
|
To run tests which directly connects to BigQuery, prepare `example/your-project-000.json`, then
|
112
139
|
|
113
|
-
|
140
|
+
```
|
114
141
|
$ bundle exec rake test
|
115
142
|
```
|
116
143
|
|
data/bigquery_migration.gemspec
CHANGED
@@ -22,6 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_dependency "google-api-client"
|
23
23
|
spec.add_dependency "tzinfo"
|
24
24
|
spec.add_dependency "thor"
|
25
|
+
spec.add_dependency "inifile"
|
25
26
|
|
26
27
|
spec.add_development_dependency "bundler", "~> 1.11"
|
27
28
|
spec.add_development_dependency "rake", "~> 10.0"
|
@@ -0,0 +1,23 @@
|
|
1
|
+
bigquery: &bigquery
|
2
|
+
# project: read from ~/.config/gcloud/configurations/config_default
|
3
|
+
dataset: your_dataset_name
|
4
|
+
table: your_table_name
|
5
|
+
|
6
|
+
actions:
|
7
|
+
- action: create_dataset
|
8
|
+
<<: *bigquery
|
9
|
+
- action: migrate_table
|
10
|
+
<<: *bigquery
|
11
|
+
columns:
|
12
|
+
- { name: 'timestamp', type: 'TIMESTAMP' }
|
13
|
+
- name: 'record'
|
14
|
+
type: 'RECORD'
|
15
|
+
fields:
|
16
|
+
- { name: 'string', type: 'STRING' }
|
17
|
+
- { name: 'integer', type: 'INTEGER' }
|
18
|
+
- { name: 'bytes', type: 'BYTES' }
|
19
|
+
- action: migrate_table
|
20
|
+
<<: *bigquery
|
21
|
+
schema_file: example/schema.json
|
22
|
+
- action: delete_table
|
23
|
+
<<: *bigquery
|
data/example/copy_table.yml
CHANGED
data/example/example.yml
CHANGED
@@ -1 +1 @@
|
|
1
|
-
migrate_table.yml
|
1
|
+
example/migrate_table.yml
|
data/example/insert_select.yml
CHANGED
data/example/migrate_table.yml
CHANGED
data/example/table_info.yml
CHANGED
data/exe/bq-migrate
CHANGED
@@ -1 +1 @@
|
|
1
|
-
bq_migrate
|
1
|
+
exe/bq_migrate
|
@@ -8,6 +8,7 @@ require_relative 'hash_util'
|
|
8
8
|
require 'google/apis/bigquery_v2'
|
9
9
|
require 'google/api_client/auth/key_utils'
|
10
10
|
require 'securerandom'
|
11
|
+
require 'inifile'
|
11
12
|
|
12
13
|
class BigqueryMigration
|
13
14
|
class BigqueryWrapper
|
@@ -20,92 +21,50 @@ class BigqueryMigration
|
|
20
21
|
def initialize(config, opts = {})
|
21
22
|
@config = HashUtil.deep_symbolize_keys(config)
|
22
23
|
@opts = HashUtil.deep_symbolize_keys(opts)
|
23
|
-
configure
|
24
|
-
end
|
25
|
-
|
26
|
-
def configure
|
27
|
-
if json_keyfile = config[:json_keyfile]
|
28
|
-
json_key =
|
29
|
-
case json_keyfile
|
30
|
-
when String
|
31
|
-
File.read(json_keyfile)
|
32
|
-
when Hash
|
33
|
-
json_keyfile[:content]
|
34
|
-
else
|
35
|
-
raise ConfigError.new "Unsupported json_keyfile type"
|
36
|
-
end
|
37
|
-
json_keyparams =
|
38
|
-
begin
|
39
|
-
case json_key
|
40
|
-
when String
|
41
|
-
HashUtil.deep_symbolize_keys(JSON.parse(json_key))
|
42
|
-
when Hash
|
43
|
-
HashUtil.deep_symbolize_keys(json_key)
|
44
|
-
end
|
45
|
-
rescue => e
|
46
|
-
raise ConfigError.new "json_keyfile is not a JSON file"
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
if json_keyparams
|
51
|
-
config[:project] ||= json_keyparams[:project_id]
|
52
|
-
config[:json_key] = json_keyparams.to_json
|
53
|
-
end
|
54
|
-
|
55
|
-
config[:retries] ||= 5
|
56
|
-
end
|
57
|
-
|
58
|
-
def project
|
59
|
-
@project ||= config[:project] || raise(ConfigError, '`project` is required.')
|
60
|
-
end
|
61
|
-
|
62
|
-
def dataset
|
63
|
-
@dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
|
64
|
-
end
|
65
|
-
|
66
|
-
def table
|
67
|
-
@table ||= config[:table] || raise(ConfigError, '`table` is required.')
|
68
|
-
end
|
69
|
-
|
70
|
-
def job_status_polling_interval
|
71
|
-
@job_status_polling_interval ||= config[:job_status_polling_interval] || 5
|
72
|
-
end
|
73
|
-
|
74
|
-
def job_status_max_polling_time
|
75
|
-
@job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
|
76
|
-
end
|
77
|
-
|
78
|
-
def dry_run?
|
79
|
-
@opts[:dry_run]
|
80
|
-
end
|
81
|
-
|
82
|
-
def head
|
83
|
-
dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
|
84
24
|
end
|
85
25
|
|
86
26
|
def client
|
87
27
|
return @cached_client if @cached_client && @cached_client_expiration > Time.now
|
88
28
|
|
89
29
|
client = Google::Apis::BigqueryV2::BigqueryService.new
|
90
|
-
client.request_options.retries =
|
30
|
+
client.request_options.retries = retries
|
31
|
+
client.client_options.open_timeout_sec = open_timeout_sec
|
91
32
|
if client.request_options.respond_to?(:timeout_sec)
|
92
|
-
client.
|
93
|
-
client.request_options.timeout_sec = config[:timeout_sec] || 300
|
33
|
+
client.client_options.timeout_sec = timeout_sec
|
94
34
|
else # google-api-ruby-client >= v0.11.0
|
95
|
-
if
|
35
|
+
if timeout_sec
|
96
36
|
logger.warn { "timeout_sec is deprecated in google-api-ruby-client >= v0.11.0. Use read_timeout_sec instead" }
|
97
37
|
end
|
98
|
-
client.client_options.
|
99
|
-
client.client_options.
|
100
|
-
client.client_options.read_timeout_sec = config[:read_timeout_sec] || config[:timeout_sec] || 300 # default: 60
|
38
|
+
client.client_options.send_timeout_sec = send_timeout_sec
|
39
|
+
client.client_options.read_timeout_sec = read_timeout_sec
|
101
40
|
end
|
102
41
|
logger.debug { "client_options: #{client.client_options.to_h}" }
|
103
42
|
logger.debug { "request_options: #{client.request_options.to_h}" }
|
104
43
|
|
105
44
|
scope = "https://www.googleapis.com/auth/bigquery"
|
106
45
|
|
107
|
-
|
108
|
-
|
46
|
+
case auth_method
|
47
|
+
when 'authorized_user'
|
48
|
+
auth = Signet::OAuth2::Client.new(
|
49
|
+
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
50
|
+
audience: "https://accounts.google.com/o/oauth2/token",
|
51
|
+
scope: scope,
|
52
|
+
client_id: credentials['client_id'],
|
53
|
+
client_secret: credentials['client_secret'],
|
54
|
+
refresh_token: credentials['refresh_token']
|
55
|
+
)
|
56
|
+
auth.refresh!
|
57
|
+
when 'compute_engine'
|
58
|
+
auth = Google::Auth::GCECredentials.new
|
59
|
+
when 'service_account'
|
60
|
+
key = StringIO.new(credentials.to_json)
|
61
|
+
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
62
|
+
when 'application_default'
|
63
|
+
auth = Google::Auth.get_application_default([scope])
|
64
|
+
else
|
65
|
+
raise ConfigError, "Unknown auth method: #{auth_method}"
|
66
|
+
end
|
67
|
+
|
109
68
|
client.authorization = auth
|
110
69
|
|
111
70
|
@cached_client_expiration = Time.now + 1800
|
@@ -747,5 +706,127 @@ class BigqueryMigration
|
|
747
706
|
|
748
707
|
result.merge!( before_columns: before_columns, after_columns: after_columns )
|
749
708
|
end
|
709
|
+
|
710
|
+
# For old version compatibility
|
711
|
+
# Use credentials_file or credentials instead
|
712
|
+
def json_key
|
713
|
+
if json_keyfile = config[:json_keyfile]
|
714
|
+
begin
|
715
|
+
case json_keyfile
|
716
|
+
when String
|
717
|
+
return JSON.parse(File.read(json_keyfile))
|
718
|
+
when Hash
|
719
|
+
return json_keyfile[:content]
|
720
|
+
else
|
721
|
+
raise ConfigError.new "Unsupported json_keyfile type"
|
722
|
+
end
|
723
|
+
rescue => e
|
724
|
+
raise ConfigError.new "json_keyfile is not a JSON file"
|
725
|
+
end
|
726
|
+
end
|
727
|
+
nil
|
728
|
+
end
|
729
|
+
|
730
|
+
# compute_engine, authorized_user, service_account
|
731
|
+
def auth_method
|
732
|
+
@auth_method ||= ENV['AUTH_METHOD'] || config.fetch(:auth_method, nil) || credentials['type'] || 'compute_engine'
|
733
|
+
end
|
734
|
+
|
735
|
+
def credentials
|
736
|
+
json_key || JSON.parse(config.fetch(:credentials, nil) || File.read(credentials_file))
|
737
|
+
end
|
738
|
+
|
739
|
+
def credentials_file
|
740
|
+
@credentials_file ||= File.expand_path(
|
741
|
+
# ref. https://developers.google.com/identity/protocols/application-default-credentials
|
742
|
+
ENV['GOOGLE_APPLICATION_CREDENTIALS'] ||
|
743
|
+
config.fetch(:credentials_file, nil) ||
|
744
|
+
(File.exist?(global_application_default_credentials_file) ? global_application_default_credentials_file : application_default_credentials_file)
|
745
|
+
)
|
746
|
+
end
|
747
|
+
|
748
|
+
def application_default_credentials_file
|
749
|
+
@application_default_credentials_file ||= File.expand_path("~/.config/gcloud/application_default_credentials.json")
|
750
|
+
end
|
751
|
+
|
752
|
+
def global_application_default_credentials_file
|
753
|
+
@global_application_default_credentials_file ||= '/etc/google/auth/application_default_credentials.json'
|
754
|
+
end
|
755
|
+
|
756
|
+
def config_default_file
|
757
|
+
File.expand_path('~/.config/gcloud/configurations/config_default')
|
758
|
+
end
|
759
|
+
|
760
|
+
def config_default
|
761
|
+
# {core:{account:'xxx',project:'xxx'},compute:{zone:'xxx}}
|
762
|
+
@config_default ||= File.readable?(config_default_file) ? HashUtil.deep_symbolize_keys(IniFile.load(config_default_file).to_h) : {}
|
763
|
+
end
|
764
|
+
|
765
|
+
def service_account_default
|
766
|
+
(config_default[:core] || {})[:account]
|
767
|
+
end
|
768
|
+
|
769
|
+
def project_default
|
770
|
+
(config_default[:core] || {})[:project]
|
771
|
+
end
|
772
|
+
|
773
|
+
def zone_default
|
774
|
+
(config_default[:compute] || {})[:zone]
|
775
|
+
end
|
776
|
+
|
777
|
+
def service_account
|
778
|
+
@service_account ||= ENV['GOOGLE_SERVICE_ACCOUNT'] || config.fetch(:service_account, nil) || credentials['client_email'] || service_account_default
|
779
|
+
end
|
780
|
+
|
781
|
+
def retries
|
782
|
+
@retries ||= ENV['RETRIES'] || config.fetch(:retries, nil) || 5
|
783
|
+
end
|
784
|
+
|
785
|
+
# For google-api-client < 0.11.0. Deprecated
|
786
|
+
def timeout_sec
|
787
|
+
@timeout_sec ||= ENV['TIMEOUT_SEC'] || config.fetch(:timeout_sec, nil)
|
788
|
+
end
|
789
|
+
|
790
|
+
def send_timeout_sec
|
791
|
+
@send_timeout_sec ||= ENV['SEND_TIMEOUT_SEC'] || config.fetch(:send_timeout_sec, nil) || 60
|
792
|
+
end
|
793
|
+
|
794
|
+
def read_timeout_sec
|
795
|
+
@read_timeout_sec ||= ENV['READ_TIMEOUT_SEC'] || config.fetch(:read_timeout_sec, nil) || timeout_sec || 300
|
796
|
+
end
|
797
|
+
|
798
|
+
def open_timeout_sec
|
799
|
+
@open_timeout_sec ||= ENV['OPEN_TIMEOUT_SEC'] || config.fetch(:open_timeout_sec, nil) || 300
|
800
|
+
end
|
801
|
+
|
802
|
+
def project
|
803
|
+
@project ||= ENV['GOOGLE_PROJECT'] || config.fetch(:project, nil) || credentials['project_id']
|
804
|
+
@project ||= credentials['client_email'].chomp('.iam.gserviceaccount.com').split('@').last if credentials['client_email']
|
805
|
+
@project ||= project_default || raise(ConfigError, '`project` is required.')
|
806
|
+
end
|
807
|
+
|
808
|
+
def dataset
|
809
|
+
@dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
|
810
|
+
end
|
811
|
+
|
812
|
+
def table
|
813
|
+
@table ||= config[:table] || raise(ConfigError, '`table` is required.')
|
814
|
+
end
|
815
|
+
|
816
|
+
def job_status_polling_interval
|
817
|
+
@job_status_polling_interval ||= config[:job_status_polling_interval] || 5
|
818
|
+
end
|
819
|
+
|
820
|
+
def job_status_max_polling_time
|
821
|
+
@job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
|
822
|
+
end
|
823
|
+
|
824
|
+
def dry_run?
|
825
|
+
@opts[:dry_run]
|
826
|
+
end
|
827
|
+
|
828
|
+
def head
|
829
|
+
dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
|
830
|
+
end
|
750
831
|
end
|
751
832
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bigquery_migration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-api-client
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: inifile
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: bundler
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +171,7 @@ files:
|
|
157
171
|
- bigquery_migration.gemspec
|
158
172
|
- bin/console
|
159
173
|
- bin/setup
|
174
|
+
- example/application_default.yml
|
160
175
|
- example/copy_table.yml
|
161
176
|
- example/example.yml
|
162
177
|
- example/insert_select.yml
|
@@ -199,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
199
214
|
version: '0'
|
200
215
|
requirements: []
|
201
216
|
rubyforge_project:
|
202
|
-
rubygems_version: 2.
|
217
|
+
rubygems_version: 2.6.11
|
203
218
|
signing_key:
|
204
219
|
specification_version: 4
|
205
220
|
summary: Migrate BigQuery table schema
|