embulk-output-bigquery 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -3
- data/README.md +57 -33
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_append_direct_schema_update_options.yml +1 -1
- data/example/config_client_options.yml +1 -1
- data/example/config_csv.yml +1 -1
- data/example/config_delete_in_advance.yml +1 -1
- data/example/config_delete_in_advance_field_partitioned_table.yml +1 -1
- data/example/config_delete_in_advance_partitioned_table.yml +1 -1
- data/example/config_expose_errors.yml +1 -1
- data/example/config_gcs.yml +1 -1
- data/example/config_guess_from_embulk_schema.yml +1 -1
- data/example/config_guess_with_column_options.yml +1 -1
- data/example/config_max_threads.yml +1 -1
- data/example/config_min_ouput_tasks.yml +1 -1
- data/example/config_mode_append.yml +1 -1
- data/example/config_mode_append_direct.yml +1 -1
- data/example/config_payload_column.yml +1 -1
- data/example/config_payload_column_index.yml +1 -1
- data/example/config_progress_log_interval.yml +1 -1
- data/example/config_replace.yml +1 -1
- data/example/config_replace_backup.yml +1 -1
- data/example/config_replace_backup_field_partitioned_table.yml +1 -1
- data/example/config_replace_backup_partitioned_table.yml +1 -1
- data/example/config_replace_field_partitioned_table.yml +1 -1
- data/example/config_replace_partitioned_table.yml +1 -1
- data/example/config_replace_schema_update_options.yml +1 -1
- data/example/config_skip_file_generation.yml +1 -1
- data/example/config_table_strftime.yml +1 -1
- data/example/config_template_table.yml +1 -1
- data/example/config_with_rehearsal.yml +1 -1
- data/lib/embulk/output/bigquery.rb +8 -17
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/test/test_bigquery_client.rb +1 -5
- data/test/test_configure.rb +4 -12
- data/test/test_transaction.rb +5 -6
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c0942035a81c9180260f8329ccaa5ba99de2185ea5f9ec5f1b3ffe87d5e8a73
|
4
|
+
data.tar.gz: c543a1b9f1278cf5d543a96bd3b8c465b2727b03df67d1e1726bef40135a1d42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23559e485346f2f8d65fa76aef2284c8b8c682257ee317b2088b30515e6e2a2936cc3c5b8ab5c3020ee9f9790e735bc48c41bb8e5d30fc777174d681796128c1
|
7
|
+
data.tar.gz: 336988c0afb153c0b9b7532bf9d85523bb9e9641eca7a79b6ab491be5567e2be9204c47417b28a8d42bbae2907cb6892b1ce5abb98261564c696b15478deb3ad
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,17 @@
|
|
1
|
+
## 0.6.0 - 2019-08-11
|
2
|
+
|
3
|
+
Cleanup `auth_method`:
|
4
|
+
|
5
|
+
* [enhancement] Support `auth_method: authorized_user` (OAuth)
|
6
|
+
* [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
|
7
|
+
* [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
|
8
|
+
* [incompatibility change] Change the default `auth_method` to `application_default` from `private_key`.
|
9
|
+
|
1
10
|
## 0.5.0 - 2019-08-10
|
2
11
|
|
3
|
-
* [incompatibility change] Drop deprecated
|
4
|
-
* [incompatibility change] Drop
|
5
|
-
* [incompatibility change] Change default value of `
|
12
|
+
* [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
|
13
|
+
* [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
|
14
|
+
* [incompatibility change] Change default value of `auto_create_table` to `true` from `false`
|
6
15
|
* Modes `replace`, `replace_backup`, `append`, `delete_in_advance`, that is, except `append_direct` requires `auto_create_table: true`.
|
7
16
|
|
8
17
|
## 0.4.14 - 2019-08-10
|
data/README.md
CHANGED
@@ -29,12 +29,10 @@ OAuth flow for installed applications.
|
|
29
29
|
|
30
30
|
| name | type | required? | default | description |
|
31
31
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
32
|
-
| mode | string | optional | "append" | See [Mode](#mode)
|
33
|
-
| auth_method | string | optional | "
|
34
|
-
|
|
35
|
-
|
|
36
|
-
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
37
|
-
| project | string | required if json_keyfile is not given | | project_id |
|
32
|
+
| mode | string | optional | "append" | See [Mode](#mode) |
|
33
|
+
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
|
+
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
|
+
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
38
36
|
| dataset | string | required | | dataset |
|
39
37
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
40
38
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
@@ -108,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
108
106
|
out:
|
109
107
|
type: bigquery
|
110
108
|
mode: append
|
111
|
-
auth_method:
|
112
|
-
|
113
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
109
|
+
auth_method: service_account
|
110
|
+
json_keyfile: /path/to/json_keyfile.json
|
114
111
|
project: your-project-000
|
115
112
|
dataset: your_dataset_name
|
116
113
|
table: your_table_name
|
@@ -118,7 +115,7 @@ out:
|
|
118
115
|
source_format: NEWLINE_DELIMITED_JSON
|
119
116
|
```
|
120
117
|
|
121
|
-
###
|
118
|
+
### Location
|
122
119
|
|
123
120
|
The geographic location of the dataset. Required except for US and EU.
|
124
121
|
|
@@ -126,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
|
|
126
123
|
|
127
124
|
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
128
125
|
|
129
|
-
###
|
126
|
+
### Mode
|
130
127
|
|
131
128
|
5 modes are provided.
|
132
129
|
|
@@ -165,53 +162,69 @@ NOTE: BigQuery does not support replacing (actually, copying into) a non-partiti
|
|
165
162
|
|
166
163
|
### Authentication
|
167
164
|
|
168
|
-
There are
|
165
|
+
There are four authentication methods
|
169
166
|
|
170
|
-
1.
|
171
|
-
|
172
|
-
|
167
|
+
1. `service_account` (or `json_key` for backward compatibility)
|
168
|
+
1. `authorized_user`
|
169
|
+
1. `compute_engine`
|
170
|
+
1. `application_default`
|
173
171
|
|
174
|
-
####
|
172
|
+
#### service\_account (or json\_key)
|
175
173
|
|
176
|
-
|
177
|
-
download its
|
174
|
+
Use GCP service account credentials.
|
175
|
+
You first need to create a service account, download its json key and deploy the key with embulk.
|
178
176
|
|
179
177
|
```yaml
|
180
178
|
out:
|
181
179
|
type: bigquery
|
182
|
-
auth_method:
|
183
|
-
|
184
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
180
|
+
auth_method: service_account
|
181
|
+
json_keyfile: /path/to/json_keyfile.json
|
185
182
|
```
|
186
183
|
|
187
|
-
|
184
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
185
|
+
|
186
|
+
```yaml
|
187
|
+
out:
|
188
|
+
type: bigquery
|
189
|
+
auth_method: service_account
|
190
|
+
json_keyfile:
|
191
|
+
content: |
|
192
|
+
{
|
193
|
+
"private_key_id": "123456789",
|
194
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
195
|
+
"client_email": "..."
|
196
|
+
}
|
197
|
+
```
|
188
198
|
|
189
|
-
|
190
|
-
|
199
|
+
#### authorized\_user
|
200
|
+
|
201
|
+
Use Google user credentials.
|
202
|
+
You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
|
191
203
|
|
192
204
|
```yaml
|
193
205
|
out:
|
194
206
|
type: bigquery
|
195
|
-
auth_method:
|
196
|
-
json_keyfile: /path/to/
|
207
|
+
auth_method: authorized_user
|
208
|
+
json_keyfile: /path/to/credentials.json
|
197
209
|
```
|
198
210
|
|
199
|
-
You can also embed contents of json_keyfile at config.yml.
|
211
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
200
212
|
|
201
213
|
```yaml
|
202
214
|
out:
|
203
215
|
type: bigquery
|
204
|
-
auth_method:
|
216
|
+
auth_method: service_account
|
205
217
|
json_keyfile:
|
206
218
|
content: |
|
207
219
|
{
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
220
|
+
"client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
|
221
|
+
"client_secret":"xxxxxxxxxxx",
|
222
|
+
"refresh_token":"xxxxxxxxxxx",
|
223
|
+
"type":"authorized_user"
|
224
|
+
}
|
212
225
|
```
|
213
226
|
|
214
|
-
####
|
227
|
+
#### compute\_engine
|
215
228
|
|
216
229
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
217
230
|
run embulk in Google Compute Engine. In this third authentication method, you need to
|
@@ -224,6 +237,17 @@ out:
|
|
224
237
|
auth_method: compute_engine
|
225
238
|
```
|
226
239
|
|
240
|
+
#### application\_default
|
241
|
+
|
242
|
+
Use Application Default Credentials (ADC).
|
243
|
+
See https://cloud.google.com/docs/authentication/production for details.
|
244
|
+
|
245
|
+
```yaml
|
246
|
+
out:
|
247
|
+
type: bigquery
|
248
|
+
auth_method: application_default
|
249
|
+
```
|
250
|
+
|
227
251
|
### Table id formatting
|
228
252
|
|
229
253
|
`table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.6.0"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
data/example/config_csv.yml
CHANGED
data/example/config_gcs.yml
CHANGED
data/example/config_replace.yml
CHANGED
@@ -23,7 +23,7 @@ module Embulk
|
|
23
23
|
# @return JSON string
|
24
24
|
def self.load(v)
|
25
25
|
if v.is_a?(String) # path
|
26
|
-
File.read(v)
|
26
|
+
File.read(File.expand_path(v))
|
27
27
|
elsif v.is_a?(Hash)
|
28
28
|
v['content']
|
29
29
|
end
|
@@ -33,9 +33,7 @@ module Embulk
|
|
33
33
|
def self.configure(config, schema, task_count)
|
34
34
|
task = {
|
35
35
|
'mode' => config.param('mode', :string, :default => 'append'),
|
36
|
-
'auth_method' => config.param('auth_method', :string, :default => '
|
37
|
-
'service_account_email' => config.param('service_account_email', :string, :default => nil),
|
38
|
-
'p12_keyfile' => config.param('p12_keyfile', :string, :default => nil),
|
36
|
+
'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
|
39
37
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
40
38
|
'project' => config.param('project', :string, :default => nil),
|
41
39
|
'dataset' => config.param('dataset', :string),
|
@@ -125,28 +123,21 @@ module Embulk
|
|
125
123
|
end
|
126
124
|
|
127
125
|
task['auth_method'] = task['auth_method'].downcase
|
128
|
-
unless %w[
|
129
|
-
raise ConfigError.new "`auth_method` must be one of
|
126
|
+
unless %w[json_key service_account authorized_user compute_engine application_default].include?(task['auth_method'])
|
127
|
+
raise ConfigError.new "`auth_method` must be one of service_account (or json_key), authorized_user, compute_engine, application_default"
|
130
128
|
end
|
131
|
-
if task['auth_method'] == '
|
132
|
-
raise ConfigError.new "`
|
133
|
-
end
|
134
|
-
if task['auth_method'] == 'json_key' and task['json_keyfile'].nil?
|
135
|
-
raise ConfigError.new "`json_keyfile` is required for auth_method json_key"
|
129
|
+
if (task['auth_method'] == 'service_account' or task['auth_method'] == 'json_key') and task['json_keyfile'].nil?
|
130
|
+
raise ConfigError.new "`json_keyfile` is required for auth_method: service_account (or json_key)"
|
136
131
|
end
|
137
132
|
|
138
|
-
jsonkey_params = nil
|
139
133
|
if task['json_keyfile']
|
140
134
|
begin
|
141
|
-
|
135
|
+
json_key = JSON.parse(task['json_keyfile'])
|
136
|
+
task['project'] ||= json_key['project_id']
|
142
137
|
rescue => e
|
143
138
|
raise ConfigError.new "json_keyfile is not a JSON file"
|
144
139
|
end
|
145
140
|
end
|
146
|
-
|
147
|
-
if jsonkey_params
|
148
|
-
task['project'] ||= jsonkey_params['project_id']
|
149
|
-
end
|
150
141
|
if task['project'].nil?
|
151
142
|
raise ConfigError.new "Required field \"project\" is not set"
|
152
143
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'googleauth'
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Output
|
5
|
+
class Bigquery < OutputPlugin
|
6
|
+
class Auth
|
7
|
+
|
8
|
+
attr_reader :auth_method, :json_key, :scope
|
9
|
+
|
10
|
+
def initialize(task, scope)
|
11
|
+
@auth_method = task['auth_method']
|
12
|
+
@json_key = task['json_keyfile']
|
13
|
+
@scope = scope
|
14
|
+
end
|
15
|
+
|
16
|
+
def authenticate
|
17
|
+
case auth_method
|
18
|
+
when 'authorized_user'
|
19
|
+
key = StringIO.new(json_key)
|
20
|
+
return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
|
21
|
+
when 'compute_engine'
|
22
|
+
return Google::Auth::GCECredentials.new
|
23
|
+
when 'service_account', 'json_key' # json_key is for backward compatibility
|
24
|
+
key = StringIO.new(json_key)
|
25
|
+
return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
26
|
+
when 'application_default'
|
27
|
+
return Google::Auth.get_application_default([scope])
|
28
|
+
else
|
29
|
+
raise ConfigError.new("Unknown auth method: #{auth_method}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'auth'
|
2
2
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
@@ -14,6 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, scope, client_class)
|
15
15
|
@task = task
|
16
16
|
@scope = scope
|
17
|
+
@auth = Auth.new(task, scope)
|
17
18
|
@client_class = client_class
|
18
19
|
end
|
19
20
|
|
@@ -37,39 +38,7 @@ module Embulk
|
|
37
38
|
Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
|
38
39
|
Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
|
39
40
|
|
40
|
-
|
41
|
-
when 'private_key'
|
42
|
-
private_key_passphrase = 'notasecret'
|
43
|
-
key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
|
44
|
-
auth = Signet::OAuth2::Client.new(
|
45
|
-
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
46
|
-
audience: "https://accounts.google.com/o/oauth2/token",
|
47
|
-
scope: @scope,
|
48
|
-
issuer: @task['service_account_email'],
|
49
|
-
signing_key: key)
|
50
|
-
|
51
|
-
when 'compute_engine'
|
52
|
-
auth = Google::Auth::GCECredentials.new
|
53
|
-
|
54
|
-
when 'json_key'
|
55
|
-
json_key = @task['json_keyfile']
|
56
|
-
if File.exist?(json_key)
|
57
|
-
auth = File.open(json_key) do |f|
|
58
|
-
Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
key = StringIO.new(json_key)
|
62
|
-
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
|
63
|
-
end
|
64
|
-
|
65
|
-
when 'application_default'
|
66
|
-
auth = Google::Auth.get_application_default([@scope])
|
67
|
-
|
68
|
-
else
|
69
|
-
raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
|
70
|
-
end
|
71
|
-
|
72
|
-
client.authorization = auth
|
41
|
+
client.authorization = @auth.authenticate
|
73
42
|
|
74
43
|
@cached_client_expiration = Time.now + 1800
|
75
44
|
@cached_client = client
|
@@ -32,7 +32,7 @@ else
|
|
32
32
|
'dataset' => 'your_dataset_name',
|
33
33
|
'table' => 'your_table_name',
|
34
34
|
'auth_method' => 'json_key',
|
35
|
-
'json_keyfile' => JSON_KEYFILE,
|
35
|
+
'json_keyfile' => File.read(JSON_KEYFILE),
|
36
36
|
'retries' => 3,
|
37
37
|
'timeout_sec' => 300,
|
38
38
|
'open_timeout_sec' => 300,
|
@@ -61,10 +61,6 @@ else
|
|
61
61
|
def test_json_keyfile
|
62
62
|
assert_nothing_raised { BigqueryClient.new(least_task, schema).client }
|
63
63
|
end
|
64
|
-
|
65
|
-
def test_p12_keyfile
|
66
|
-
# pending
|
67
|
-
end
|
68
64
|
end
|
69
65
|
|
70
66
|
sub_test_case "create_dataset" do
|
data/test/test_configure.rb
CHANGED
@@ -18,10 +18,9 @@ module Embulk
|
|
18
18
|
|
19
19
|
def least_config
|
20
20
|
DataSource.new({
|
21
|
-
'project'
|
22
|
-
'dataset'
|
23
|
-
'table'
|
24
|
-
'p12_keyfile' => __FILE__, # fake
|
21
|
+
'project' => 'your_project_name',
|
22
|
+
'dataset' => 'your_dataset_name',
|
23
|
+
'table' => 'your_table_name',
|
25
24
|
})
|
26
25
|
end
|
27
26
|
|
@@ -43,9 +42,7 @@ module Embulk
|
|
43
42
|
def test_configure_default
|
44
43
|
task = Bigquery.configure(least_config, schema, processor_count)
|
45
44
|
assert_equal "append", task['mode']
|
46
|
-
assert_equal "
|
47
|
-
assert_equal nil, task['service_account_email']
|
48
|
-
assert_equal __FILE__, task['p12_keyfile']
|
45
|
+
assert_equal "application_default", task['auth_method']
|
49
46
|
assert_equal nil, task['json_keyfile']
|
50
47
|
assert_equal "your_project_name", task['project']
|
51
48
|
assert_equal "your_dataset_name", task['dataset']
|
@@ -132,11 +129,6 @@ module Embulk
|
|
132
129
|
config = least_config.merge('auth_method' => 'foobar')
|
133
130
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
134
131
|
|
135
|
-
config = least_config.merge('auth_method' => 'private_key').tap {|h| h.delete('p12_keyfile') }
|
136
|
-
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
137
|
-
config = least_config.merge('auth_method' => 'private_key', 'p12_keyfile' => 'dummy')
|
138
|
-
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
139
|
-
|
140
132
|
config = least_config.merge('auth_method' => 'json_key').tap {|h| h.delete('json_keyfile') }
|
141
133
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
142
134
|
config = least_config.merge('auth_method' => 'json_key', 'json_keyfile' => "#{EXAMPLE_ROOT}/json_key.json")
|
data/test/test_transaction.rb
CHANGED
@@ -8,12 +8,11 @@ module Embulk
|
|
8
8
|
class TestTransaction < Test::Unit::TestCase
|
9
9
|
def least_config
|
10
10
|
DataSource.new({
|
11
|
-
'project'
|
12
|
-
'dataset'
|
13
|
-
'table'
|
14
|
-
'
|
15
|
-
'
|
16
|
-
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
11
|
+
'project' => 'your_project_name',
|
12
|
+
'dataset' => 'your_dataset_name',
|
13
|
+
'table' => 'your_table_name',
|
14
|
+
'temp_table' => 'temp_table', # randomly created is not good for our test
|
15
|
+
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
17
16
|
})
|
18
17
|
end
|
19
18
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -128,6 +128,7 @@ files:
|
|
128
128
|
- example/schema.json
|
129
129
|
- example/schema_expose_errors.json
|
130
130
|
- lib/embulk/output/bigquery.rb
|
131
|
+
- lib/embulk/output/bigquery/auth.rb
|
131
132
|
- lib/embulk/output/bigquery/bigquery_client.rb
|
132
133
|
- lib/embulk/output/bigquery/file_writer.rb
|
133
134
|
- lib/embulk/output/bigquery/gcs_client.rb
|