bigquery_migration 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4cc3b6f5ad69666d2dec7c19ba97cbafecd8f74a
4
- data.tar.gz: 490f4b4c6dd4801771de9296d9907a90ea5f43b2
3
+ metadata.gz: 97e77d9eca83b83c064a4e5328dbd3d776633290
4
+ data.tar.gz: 8de9b03871570d5558e29a16d84b0fbef6873fc7
5
5
  SHA512:
6
- metadata.gz: b486f63c1e1119fc8061308d073559d88cf006dcb935c73dd023b96ae2b783a58ef7f4f2f4717ed30e72f6536c8c46335430f0e0fa993343fbf1781397ef3e61
7
- data.tar.gz: 2c36c1a2a889d7a8137139bd6ec1ca1900b2e5d8129ab6b3a073c206b5ca2831cd3fbb912c8b16fe84302baba63a6c0b90e607c7da7dc8aa7e301763fa22dfc6
6
+ metadata.gz: 7de67b97655404b7f4a9f209484932252ed9d64da3e5535830012ce22cf6fcb3ed1283f13e8859017072bcaff43627d069f9b03851d97b00157c262b82351468
7
+ data.tar.gz: f82e01cb2b9d25c24344dd839d56807039c7c24fdf4ea055b308a02b84feecb49997ce73c7faf698b6c6345a9c3435350be8fb416d2a53894918fad6836c502e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.3.0 (2017/04/26)
2
+
3
+ Enhancements:
4
+
5
+ * Support more authentication methods such as oauth, compute_engine, application_default
6
+
1
7
  # 0.2.2 (2017/04/04)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -95,13 +95,40 @@ This tool has an advantage that it is **faster** than reloading data entirely.
95
95
 
96
96
  ### Run example:
97
97
 
98
- Prepare `example/your-project-000.json`, then
98
+ **Service Account**
99
+
100
+ Prepare your service account json at `example/your-project-000.json`, then
99
101
 
100
102
  ```
101
103
  $ bundle exec bq_migrate run example/example.yml # dry-run
102
104
  $ bundle exec bq_migrate run example/example.yml --exec
103
105
  ```
104
106
 
107
+ **OAuth**
108
+
109
+ Install gcloud into your development environment:
110
+
111
+ ```
112
+ curl https://sdk.cloud.google.com | bash
113
+ gcloud init
114
+ gcloud auth login
115
+ gcloud auth application-default login
116
+ gcloud config set project <GCP_PROJECT_NAME>
117
+ ```
118
+
119
+ Make sure `gcloud` works
120
+
121
+ ```
122
+ gcloud compute instances list
123
+ ```
124
+
125
+ Run as:
126
+
127
+ ```
128
+ $ bundle exec bq_migrate run example/application_default.yml # dry-run
129
+ $ bundle exec bq_migrate run example/application_default.yml --exec
130
+ ```
131
+
105
132
  ### Run test:
106
133
 
107
134
  ```
@@ -110,7 +137,7 @@ $ bundle exec rake test
110
137
 
111
138
  To run tests which directly connects to BigQuery, prepare `example/your-project-000.json`, then
112
139
 
113
- ````
140
+ ```
114
141
  $ bundle exec rake test
115
142
  ```
116
143
 
@@ -22,6 +22,7 @@ Gem::Specification.new do |spec|
22
22
  spec.add_dependency "google-api-client"
23
23
  spec.add_dependency "tzinfo"
24
24
  spec.add_dependency "thor"
25
+ spec.add_dependency "inifile"
25
26
 
26
27
  spec.add_development_dependency "bundler", "~> 1.11"
27
28
  spec.add_development_dependency "rake", "~> 10.0"
@@ -0,0 +1,23 @@
1
+ bigquery: &bigquery
2
+ # project: read from ~/.config/gcloud/configurations/config_default
3
+ dataset: your_dataset_name
4
+ table: your_table_name
5
+
6
+ actions:
7
+ - action: create_dataset
8
+ <<: *bigquery
9
+ - action: migrate_table
10
+ <<: *bigquery
11
+ columns:
12
+ - { name: 'timestamp', type: 'TIMESTAMP' }
13
+ - name: 'record'
14
+ type: 'RECORD'
15
+ fields:
16
+ - { name: 'string', type: 'STRING' }
17
+ - { name: 'integer', type: 'INTEGER' }
18
+ - { name: 'bytes', type: 'BYTES' }
19
+ - action: migrate_table
20
+ <<: *bigquery
21
+ schema_file: example/schema.json
22
+ - action: delete_table
23
+ <<: *bigquery
@@ -1,5 +1,5 @@
1
1
  bigquery: &bigquery
2
- json_keyfile: example/your-project-000.json
2
+ credentials_file: example/your-project-000.json
3
3
  dataset: your_dataset_name
4
4
  table: your_table_name
5
5
 
data/example/example.yml CHANGED
@@ -1 +1 @@
1
- migrate_table.yml
1
+ example/migrate_table.yml
@@ -1,5 +1,5 @@
1
1
  bigquery: &bigquery
2
- json_keyfile: example/your-project-000.json
2
+ credentials_file: example/your-project-000.json
3
3
  dataset: your_dataset_name
4
4
  table: your_table_name
5
5
 
@@ -1,5 +1,5 @@
1
1
  bigquery: &bigquery
2
- json_keyfile: example/your-project-000.json
2
+ credentials_file: example/your-project-000.json
3
3
  dataset: your_dataset_name
4
4
  table: your_table_name
5
5
 
@@ -1,5 +1,5 @@
1
1
  bigquery: &bigquery
2
- json_keyfile: example/your-project-000.json
2
+ credentials_file: example/your-project-000.json
3
3
  dataset: your_dataset_name
4
4
  table: your_table_name
5
5
 
@@ -1,5 +1,5 @@
1
1
  bigquery: &bigquery
2
- json_keyfile: example/your-project-000.json
2
+ credentials_file: example/your-project-000.json
3
3
  dataset: your_dataset_name
4
4
  table: your_table_name
5
5
 
data/exe/bq-migrate CHANGED
@@ -1 +1 @@
1
- bq_migrate
1
+ exe/bq_migrate
@@ -8,6 +8,7 @@ require_relative 'hash_util'
8
8
  require 'google/apis/bigquery_v2'
9
9
  require 'google/api_client/auth/key_utils'
10
10
  require 'securerandom'
11
+ require 'inifile'
11
12
 
12
13
  class BigqueryMigration
13
14
  class BigqueryWrapper
@@ -20,92 +21,50 @@ class BigqueryMigration
20
21
  def initialize(config, opts = {})
21
22
  @config = HashUtil.deep_symbolize_keys(config)
22
23
  @opts = HashUtil.deep_symbolize_keys(opts)
23
- configure
24
- end
25
-
26
- def configure
27
- if json_keyfile = config[:json_keyfile]
28
- json_key =
29
- case json_keyfile
30
- when String
31
- File.read(json_keyfile)
32
- when Hash
33
- json_keyfile[:content]
34
- else
35
- raise ConfigError.new "Unsupported json_keyfile type"
36
- end
37
- json_keyparams =
38
- begin
39
- case json_key
40
- when String
41
- HashUtil.deep_symbolize_keys(JSON.parse(json_key))
42
- when Hash
43
- HashUtil.deep_symbolize_keys(json_key)
44
- end
45
- rescue => e
46
- raise ConfigError.new "json_keyfile is not a JSON file"
47
- end
48
- end
49
-
50
- if json_keyparams
51
- config[:project] ||= json_keyparams[:project_id]
52
- config[:json_key] = json_keyparams.to_json
53
- end
54
-
55
- config[:retries] ||= 5
56
- end
57
-
58
- def project
59
- @project ||= config[:project] || raise(ConfigError, '`project` is required.')
60
- end
61
-
62
- def dataset
63
- @dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
64
- end
65
-
66
- def table
67
- @table ||= config[:table] || raise(ConfigError, '`table` is required.')
68
- end
69
-
70
- def job_status_polling_interval
71
- @job_status_polling_interval ||= config[:job_status_polling_interval] || 5
72
- end
73
-
74
- def job_status_max_polling_time
75
- @job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
76
- end
77
-
78
- def dry_run?
79
- @opts[:dry_run]
80
- end
81
-
82
- def head
83
- dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
84
24
  end
85
25
 
86
26
  def client
87
27
  return @cached_client if @cached_client && @cached_client_expiration > Time.now
88
28
 
89
29
  client = Google::Apis::BigqueryV2::BigqueryService.new
90
- client.request_options.retries = config[:retries]
30
+ client.request_options.retries = retries
31
+ client.client_options.open_timeout_sec = open_timeout_sec
91
32
  if client.request_options.respond_to?(:timeout_sec)
92
- client.request_options.open_timeout_sec = config[:open_timeout_sec] || 300
93
- client.request_options.timeout_sec = config[:timeout_sec] || 300
33
+ client.client_options.timeout_sec = timeout_sec
94
34
  else # google-api-ruby-client >= v0.11.0
95
- if config[:timeout_sec]
35
+ if timeout_sec
96
36
  logger.warn { "timeout_sec is deprecated in google-api-ruby-client >= v0.11.0. Use read_timeout_sec instead" }
97
37
  end
98
- client.client_options.open_timeout_sec = config[:open_timeout_sec] || 300 # default: 60
99
- client.client_options.send_timeout_sec = config[:send_timeout_sec] || 300 # default: 120
100
- client.client_options.read_timeout_sec = config[:read_timeout_sec] || config[:timeout_sec] || 300 # default: 60
38
+ client.client_options.send_timeout_sec = send_timeout_sec
39
+ client.client_options.read_timeout_sec = read_timeout_sec
101
40
  end
102
41
  logger.debug { "client_options: #{client.client_options.to_h}" }
103
42
  logger.debug { "request_options: #{client.request_options.to_h}" }
104
43
 
105
44
  scope = "https://www.googleapis.com/auth/bigquery"
106
45
 
107
- key = StringIO.new(config[:json_key])
108
- auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
46
+ case auth_method
47
+ when 'authorized_user'
48
+ auth = Signet::OAuth2::Client.new(
49
+ token_credential_uri: "https://accounts.google.com/o/oauth2/token",
50
+ audience: "https://accounts.google.com/o/oauth2/token",
51
+ scope: scope,
52
+ client_id: credentials['client_id'],
53
+ client_secret: credentials['client_secret'],
54
+ refresh_token: credentials['refresh_token']
55
+ )
56
+ auth.refresh!
57
+ when 'compute_engine'
58
+ auth = Google::Auth::GCECredentials.new
59
+ when 'service_account'
60
+ key = StringIO.new(credentials.to_json)
61
+ auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
62
+ when 'application_default'
63
+ auth = Google::Auth.get_application_default([scope])
64
+ else
65
+ raise ConfigError, "Unknown auth method: #{auth_method}"
66
+ end
67
+
109
68
  client.authorization = auth
110
69
 
111
70
  @cached_client_expiration = Time.now + 1800
@@ -747,5 +706,127 @@ class BigqueryMigration
747
706
 
748
707
  result.merge!( before_columns: before_columns, after_columns: after_columns )
749
708
  end
709
+
710
+ # For old version compatibility
711
+ # Use credentials_file or credentials instead
712
+ def json_key
713
+ if json_keyfile = config[:json_keyfile]
714
+ begin
715
+ case json_keyfile
716
+ when String
717
+ return JSON.parse(File.read(json_keyfile))
718
+ when Hash
719
+ return json_keyfile[:content]
720
+ else
721
+ raise ConfigError.new "Unsupported json_keyfile type"
722
+ end
723
+ rescue => e
724
+ raise ConfigError.new "json_keyfile is not a JSON file"
725
+ end
726
+ end
727
+ nil
728
+ end
729
+
730
+ # compute_engine, authorized_user, service_account
731
+ def auth_method
732
+ @auth_method ||= ENV['AUTH_METHOD'] || config.fetch(:auth_method, nil) || credentials['type'] || 'compute_engine'
733
+ end
734
+
735
+ def credentials
736
+ json_key || JSON.parse(config.fetch(:credentials, nil) || File.read(credentials_file))
737
+ end
738
+
739
+ def credentials_file
740
+ @credentials_file ||= File.expand_path(
741
+ # ref. https://developers.google.com/identity/protocols/application-default-credentials
742
+ ENV['GOOGLE_APPLICATION_CREDENTIALS'] ||
743
+ config.fetch(:credentials_file, nil) ||
744
+ (File.exist?(global_application_default_credentials_file) ? global_application_default_credentials_file : application_default_credentials_file)
745
+ )
746
+ end
747
+
748
+ def application_default_credentials_file
749
+ @application_default_credentials_file ||= File.expand_path("~/.config/gcloud/application_default_credentials.json")
750
+ end
751
+
752
+ def global_application_default_credentials_file
753
+ @global_application_default_credentials_file ||= '/etc/google/auth/application_default_credentials.json'
754
+ end
755
+
756
+ def config_default_file
757
+ File.expand_path('~/.config/gcloud/configurations/config_default')
758
+ end
759
+
760
+ def config_default
761
+ # {core:{account:'xxx',project:'xxx'},compute:{zone:'xxx}}
762
+ @config_default ||= File.readable?(config_default_file) ? HashUtil.deep_symbolize_keys(IniFile.load(config_default_file).to_h) : {}
763
+ end
764
+
765
+ def service_account_default
766
+ (config_default[:core] || {})[:account]
767
+ end
768
+
769
+ def project_default
770
+ (config_default[:core] || {})[:project]
771
+ end
772
+
773
+ def zone_default
774
+ (config_default[:compute] || {})[:zone]
775
+ end
776
+
777
+ def service_account
778
+ @service_account ||= ENV['GOOGLE_SERVICE_ACCOUNT'] || config.fetch(:service_account, nil) || credentials['client_email'] || service_account_default
779
+ end
780
+
781
+ def retries
782
+ @retries ||= ENV['RETRIES'] || config.fetch(:retries, nil) || 5
783
+ end
784
+
785
+ # For google-api-client < 0.11.0. Deprecated
786
+ def timeout_sec
787
+ @timeout_sec ||= ENV['TIMEOUT_SEC'] || config.fetch(:timeout_sec, nil)
788
+ end
789
+
790
+ def send_timeout_sec
791
+ @send_timeout_sec ||= ENV['SEND_TIMEOUT_SEC'] || config.fetch(:send_timeout_sec, nil) || 60
792
+ end
793
+
794
+ def read_timeout_sec
795
+ @read_timeout_sec ||= ENV['READ_TIMEOUT_SEC'] || config.fetch(:read_timeout_sec, nil) || timeout_sec || 300
796
+ end
797
+
798
+ def open_timeout_sec
799
+ @open_timeout_sec ||= ENV['OPEN_TIMEOUT_SEC'] || config.fetch(:open_timeout_sec, nil) || 300
800
+ end
801
+
802
+ def project
803
+ @project ||= ENV['GOOGLE_PROJECT'] || config.fetch(:project, nil) || credentials['project_id']
804
+ @project ||= credentials['client_email'].chomp('.iam.gserviceaccount.com').split('@').last if credentials['client_email']
805
+ @project ||= project_default || raise(ConfigError, '`project` is required.')
806
+ end
807
+
808
+ def dataset
809
+ @dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
810
+ end
811
+
812
+ def table
813
+ @table ||= config[:table] || raise(ConfigError, '`table` is required.')
814
+ end
815
+
816
+ def job_status_polling_interval
817
+ @job_status_polling_interval ||= config[:job_status_polling_interval] || 5
818
+ end
819
+
820
+ def job_status_max_polling_time
821
+ @job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
822
+ end
823
+
824
+ def dry_run?
825
+ @opts[:dry_run]
826
+ end
827
+
828
+ def head
829
+ dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
830
+ end
750
831
  end
751
832
  end
@@ -1,3 +1,3 @@
1
1
  class BigqueryMigration
2
- VERSION = "0.2.2"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bigquery_migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-04-04 00:00:00.000000000 Z
11
+ date: 2017-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-api-client
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: inifile
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: bundler
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -157,6 +171,7 @@ files:
157
171
  - bigquery_migration.gemspec
158
172
  - bin/console
159
173
  - bin/setup
174
+ - example/application_default.yml
160
175
  - example/copy_table.yml
161
176
  - example/example.yml
162
177
  - example/insert_select.yml
@@ -199,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
199
214
  version: '0'
200
215
  requirements: []
201
216
  rubyforge_project:
202
- rubygems_version: 2.5.2
217
+ rubygems_version: 2.6.11
203
218
  signing_key:
204
219
  specification_version: 4
205
220
  summary: Migrate BigQuery table schema