embulk-output-bigquery 0.5.0 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +29 -4
- data/README.md +71 -42
- data/embulk-output-bigquery.gemspec +10 -3
- data/lib/embulk/output/bigquery.rb +11 -20
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
- data/test/test_bigquery_client.rb +1 -5
- data/test/test_configure.rb +4 -12
- data/test/test_helper.rb +7 -1
- data/test/test_transaction.rb +5 -6
- data/test/test_value_converter_factory.rb +86 -0
- metadata +29 -51
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_field_partitioned_table.yml +0 -34
- data/example/config_replace_backup_partitioned_table.yml +0 -34
- data/example/config_replace_field_partitioned_table.yml +0 -33
- data/example/config_replace_partitioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7ea0cf04e91a092e3d97bf3e46d1c181aab4943f
|
4
|
+
data.tar.gz: 60e970acbc16128189df8c274a832d23160e4c80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14fb288ad9781515a28cf72869ca7e76081202b8cf7e29d2448bb9ec37e6a8e8e73a046930f53c41785551b354a990ccc8ee2f9298b418de672c6dfaa2e6447b
|
7
|
+
data.tar.gz: 9f8f59c89cf7cc9974ab8a287ffbf522263ff5e81ff988de95f22c9086cf805d39d8f9c7c7d41310357464a29e5ec0404fe819b5ba41de36eb0d870c4ca31144
|
data/CHANGELOG.md
CHANGED
@@ -1,9 +1,34 @@
|
|
1
|
+
## 0.6.4 - 2019-11-06
|
2
|
+
|
3
|
+
* [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
|
4
|
+
|
5
|
+
## 0.6.3 - 2019-10-28
|
6
|
+
|
7
|
+
* [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
|
8
|
+
|
9
|
+
## 0.6.2 - 2019-10-16
|
10
|
+
|
11
|
+
* [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
|
12
|
+
|
13
|
+
## 0.6.1 - 2019-08-28
|
14
|
+
|
15
|
+
* [maintenance] Release a new gem not to include symlinks to make it work on Windows.
|
16
|
+
|
17
|
+
## 0.6.0 - 2019-08-11
|
18
|
+
|
19
|
+
Cleanup `auth_method`:
|
20
|
+
|
21
|
+
* [enhancement] Support `auth_method: authorized_user` (OAuth)
|
22
|
+
* [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
|
23
|
+
* [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
|
24
|
+
* [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
|
25
|
+
|
1
26
|
## 0.5.0 - 2019-08-10
|
2
27
|
|
3
|
-
* [incompatibility change] Drop deprecated
|
4
|
-
* [incompatibility change] Drop
|
5
|
-
* [incompatibility change]
|
6
|
-
|
28
|
+
* [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
|
29
|
+
* [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
|
30
|
+
* [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
|
31
|
+
* [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
|
7
32
|
|
8
33
|
## 0.4.14 - 2019-08-10
|
9
34
|
|
data/README.md
CHANGED
@@ -29,17 +29,15 @@ OAuth flow for installed applications.
|
|
29
29
|
|
30
30
|
| name | type | required? | default | description |
|
31
31
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
32
|
-
| mode | string | optional | "append" | See [Mode](#mode)
|
33
|
-
| auth_method | string | optional | "
|
34
|
-
|
|
35
|
-
|
|
36
|
-
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
37
|
-
| project | string | required if json_keyfile is not given | | project_id |
|
32
|
+
| mode | string | optional | "append" | See [Mode](#mode) |
|
33
|
+
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
|
+
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
|
+
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
38
36
|
| dataset | string | required | | dataset |
|
39
37
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
40
38
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
41
39
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
42
|
-
| auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes
|
40
|
+
| auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
|
43
41
|
| schema_file | string | optional | | /path/to/schema.json |
|
44
42
|
| template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
|
45
43
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
@@ -108,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
108
106
|
out:
|
109
107
|
type: bigquery
|
110
108
|
mode: append
|
111
|
-
auth_method:
|
112
|
-
|
113
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
109
|
+
auth_method: service_account
|
110
|
+
json_keyfile: /path/to/json_keyfile.json
|
114
111
|
project: your-project-000
|
115
112
|
dataset: your_dataset_name
|
116
113
|
table: your_table_name
|
@@ -118,7 +115,7 @@ out:
|
|
118
115
|
source_format: NEWLINE_DELIMITED_JSON
|
119
116
|
```
|
120
117
|
|
121
|
-
###
|
118
|
+
### Location
|
122
119
|
|
123
120
|
The geographic location of the dataset. Required except for US and EU.
|
124
121
|
|
@@ -126,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
|
|
126
123
|
|
127
124
|
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
128
125
|
|
129
|
-
###
|
126
|
+
### Mode
|
130
127
|
|
131
128
|
5 modes are provided.
|
132
129
|
|
@@ -165,53 +162,69 @@ NOTE: BigQuery does not support replacing (actually, copying into) a non-partiti
|
|
165
162
|
|
166
163
|
### Authentication
|
167
164
|
|
168
|
-
There are
|
165
|
+
There are four authentication methods
|
166
|
+
|
167
|
+
1. `service_account` (or `json_key` for backward compatibility)
|
168
|
+
1. `authorized_user`
|
169
|
+
1. `compute_engine`
|
170
|
+
1. `application_default`
|
169
171
|
|
170
|
-
|
171
|
-
2. JSON key of GCP(Google Cloud Platform)'s service account
|
172
|
-
3. Pre-defined access token (Google Compute Engine only)
|
172
|
+
#### service\_account (or json\_key)
|
173
173
|
|
174
|
-
|
174
|
+
Use GCP service account credentials.
|
175
|
+
You first need to create a service account, download its json key and deploy the key with embulk.
|
176
|
+
|
177
|
+
```yaml
|
178
|
+
out:
|
179
|
+
type: bigquery
|
180
|
+
auth_method: service_account
|
181
|
+
json_keyfile: /path/to/json_keyfile.json
|
182
|
+
```
|
175
183
|
|
176
|
-
You
|
177
|
-
download its private key and deploy the key with embulk.
|
184
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
178
185
|
|
179
186
|
```yaml
|
180
187
|
out:
|
181
188
|
type: bigquery
|
182
|
-
auth_method:
|
183
|
-
|
184
|
-
|
189
|
+
auth_method: service_account
|
190
|
+
json_keyfile:
|
191
|
+
content: |
|
192
|
+
{
|
193
|
+
"private_key_id": "123456789",
|
194
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
195
|
+
"client_email": "..."
|
196
|
+
}
|
185
197
|
```
|
186
198
|
|
187
|
-
####
|
199
|
+
#### authorized\_user
|
188
200
|
|
189
|
-
|
190
|
-
|
201
|
+
Use Google user credentials.
|
202
|
+
You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
|
191
203
|
|
192
204
|
```yaml
|
193
205
|
out:
|
194
206
|
type: bigquery
|
195
|
-
auth_method:
|
196
|
-
json_keyfile: /path/to/
|
207
|
+
auth_method: authorized_user
|
208
|
+
json_keyfile: /path/to/credentials.json
|
197
209
|
```
|
198
210
|
|
199
|
-
You can also embed contents of json_keyfile at config.yml.
|
211
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
200
212
|
|
201
213
|
```yaml
|
202
214
|
out:
|
203
215
|
type: bigquery
|
204
|
-
auth_method:
|
216
|
+
auth_method: authorized_user
|
205
217
|
json_keyfile:
|
206
218
|
content: |
|
207
219
|
{
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
220
|
+
"client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
|
221
|
+
"client_secret":"xxxxxxxxxxx",
|
222
|
+
"refresh_token":"xxxxxxxxxxx",
|
223
|
+
"type":"authorized_user"
|
224
|
+
}
|
212
225
|
```
|
213
226
|
|
214
|
-
####
|
227
|
+
#### compute\_engine
|
215
228
|
|
216
229
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
217
230
|
run embulk in Google Compute Engine. In this third authentication method, you need to
|
@@ -224,6 +237,22 @@ out:
|
|
224
237
|
auth_method: compute_engine
|
225
238
|
```
|
226
239
|
|
240
|
+
#### application\_default
|
241
|
+
|
242
|
+
Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
|
243
|
+
|
244
|
+
1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
|
245
|
+
2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
|
246
|
+
3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
|
247
|
+
|
248
|
+
See https://cloud.google.com/docs/authentication/production for details.
|
249
|
+
|
250
|
+
```yaml
|
251
|
+
out:
|
252
|
+
type: bigquery
|
253
|
+
auth_method: application_default
|
254
|
+
```
|
255
|
+
|
227
256
|
### Table id formatting
|
228
257
|
|
229
258
|
`table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
@@ -232,12 +261,12 @@ Table ids are formatted at runtime
|
|
232
261
|
using the local time of the embulk server.
|
233
262
|
|
234
263
|
For example, with the configuration below,
|
235
|
-
data is inserted into tables `
|
264
|
+
data is inserted into tables `table_20150503`, `table_20150504` and so on.
|
236
265
|
|
237
266
|
```yaml
|
238
267
|
out:
|
239
268
|
type: bigquery
|
240
|
-
table: table_%
|
269
|
+
table: table_%Y%m%d
|
241
270
|
```
|
242
271
|
|
243
272
|
### Dynamic table creating
|
@@ -252,7 +281,7 @@ Please set file path of schema.json.
|
|
252
281
|
out:
|
253
282
|
type: bigquery
|
254
283
|
auto_create_table: true
|
255
|
-
table: table_%
|
284
|
+
table: table_%Y%m%d
|
256
285
|
schema_file: /path/to/schema.json
|
257
286
|
```
|
258
287
|
|
@@ -264,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
|
|
264
293
|
out:
|
265
294
|
type: bigquery
|
266
295
|
auto_create_table: true
|
267
|
-
table: table_%
|
296
|
+
table: table_%Y%m%d
|
268
297
|
template_table: existing_table_name
|
269
298
|
```
|
270
299
|
|
@@ -278,17 +307,17 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
278
307
|
|
279
308
|
- **column_options**: advanced: an array of options for columns
|
280
309
|
- **name**: column name
|
281
|
-
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
|
310
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
282
311
|
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
283
312
|
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
284
313
|
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
285
|
-
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
|
286
|
-
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
|
314
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
|
315
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
|
287
316
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
288
317
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
289
318
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
290
319
|
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
291
|
-
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
320
|
+
- **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
|
292
321
|
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
293
322
|
- **default_timezone**: default timezone for column_options (string, default is "UTC")
|
294
323
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.6.4"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.licenses = ["MIT"]
|
9
9
|
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
10
10
|
|
11
|
-
|
11
|
+
# Exclude example directory which uses symlinks from generating gem.
|
12
|
+
# Symlinks do not work properly on the Windows platform without administrator privilege.
|
13
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
|
12
14
|
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
15
|
spec.require_paths = ["lib"]
|
14
16
|
|
15
|
-
|
17
|
+
# TODO
|
18
|
+
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
|
19
|
+
# Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
|
20
|
+
# So, Force install signet < 0.12 and google-api-client < 0.33.0
|
21
|
+
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
|
22
|
+
spec.add_dependency 'google-api-client','< 0.33.0'
|
16
23
|
spec.add_dependency 'time_with_zone'
|
17
24
|
|
18
25
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
@@ -23,7 +23,7 @@ module Embulk
|
|
23
23
|
# @return JSON string
|
24
24
|
def self.load(v)
|
25
25
|
if v.is_a?(String) # path
|
26
|
-
File.read(v)
|
26
|
+
File.read(File.expand_path(v))
|
27
27
|
elsif v.is_a?(Hash)
|
28
28
|
v['content']
|
29
29
|
end
|
@@ -33,9 +33,7 @@ module Embulk
|
|
33
33
|
def self.configure(config, schema, task_count)
|
34
34
|
task = {
|
35
35
|
'mode' => config.param('mode', :string, :default => 'append'),
|
36
|
-
'auth_method' => config.param('auth_method', :string, :default => '
|
37
|
-
'service_account_email' => config.param('service_account_email', :string, :default => nil),
|
38
|
-
'p12_keyfile' => config.param('p12_keyfile', :string, :default => nil),
|
36
|
+
'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
|
39
37
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
40
38
|
'project' => config.param('project', :string, :default => nil),
|
41
39
|
'dataset' => config.param('dataset', :string),
|
@@ -125,28 +123,21 @@ module Embulk
|
|
125
123
|
end
|
126
124
|
|
127
125
|
task['auth_method'] = task['auth_method'].downcase
|
128
|
-
unless %w[
|
129
|
-
raise ConfigError.new "`auth_method` must be one of
|
126
|
+
unless %w[json_key service_account authorized_user compute_engine application_default].include?(task['auth_method'])
|
127
|
+
raise ConfigError.new "`auth_method` must be one of service_account (or json_key), authorized_user, compute_engine, application_default"
|
130
128
|
end
|
131
|
-
if task['auth_method'] == '
|
132
|
-
raise ConfigError.new "`
|
133
|
-
end
|
134
|
-
if task['auth_method'] == 'json_key' and task['json_keyfile'].nil?
|
135
|
-
raise ConfigError.new "`json_keyfile` is required for auth_method json_key"
|
129
|
+
if (task['auth_method'] == 'service_account' or task['auth_method'] == 'json_key') and task['json_keyfile'].nil?
|
130
|
+
raise ConfigError.new "`json_keyfile` is required for auth_method: service_account (or json_key)"
|
136
131
|
end
|
137
132
|
|
138
|
-
jsonkey_params = nil
|
139
133
|
if task['json_keyfile']
|
140
134
|
begin
|
141
|
-
|
135
|
+
json_key = JSON.parse(task['json_keyfile'])
|
136
|
+
task['project'] ||= json_key['project_id']
|
142
137
|
rescue => e
|
143
138
|
raise ConfigError.new "json_keyfile is not a JSON file"
|
144
139
|
end
|
145
140
|
end
|
146
|
-
|
147
|
-
if jsonkey_params
|
148
|
-
task['project'] ||= jsonkey_params['project_id']
|
149
|
-
end
|
150
141
|
if task['project'].nil?
|
151
142
|
raise ConfigError.new "Required field \"project\" is not set"
|
152
143
|
end
|
@@ -313,14 +304,14 @@ module Embulk
|
|
313
304
|
bigquery.create_table_if_not_exists(task['table'])
|
314
305
|
when 'replace'
|
315
306
|
bigquery.create_table_if_not_exists(task['temp_table'])
|
316
|
-
bigquery.create_table_if_not_exists(task['table'])
|
307
|
+
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
317
308
|
when 'append'
|
318
309
|
bigquery.create_table_if_not_exists(task['temp_table'])
|
319
|
-
bigquery.create_table_if_not_exists(task['table'])
|
310
|
+
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
320
311
|
when 'replace_backup'
|
321
312
|
bigquery.create_table_if_not_exists(task['temp_table'])
|
322
313
|
bigquery.create_table_if_not_exists(task['table'])
|
323
|
-
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
|
314
|
+
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
|
324
315
|
else # append_direct
|
325
316
|
if task['auto_create_table']
|
326
317
|
bigquery.create_table_if_not_exists(task['table'])
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'googleauth'
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Output
|
5
|
+
class Bigquery < OutputPlugin
|
6
|
+
class Auth
|
7
|
+
|
8
|
+
attr_reader :auth_method, :json_key, :scope
|
9
|
+
|
10
|
+
def initialize(task, scope)
|
11
|
+
@auth_method = task['auth_method']
|
12
|
+
@json_key = task['json_keyfile']
|
13
|
+
@scope = scope
|
14
|
+
end
|
15
|
+
|
16
|
+
def authenticate
|
17
|
+
case auth_method
|
18
|
+
when 'authorized_user'
|
19
|
+
key = StringIO.new(json_key)
|
20
|
+
return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
|
21
|
+
when 'compute_engine'
|
22
|
+
return Google::Auth::GCECredentials.new
|
23
|
+
when 'service_account', 'json_key' # json_key is for backward compatibility
|
24
|
+
key = StringIO.new(json_key)
|
25
|
+
return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
26
|
+
when 'application_default'
|
27
|
+
return Google::Auth.get_application_default([scope])
|
28
|
+
else
|
29
|
+
raise ConfigError.new("Unknown auth method: #{auth_method}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'auth'
|
2
2
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
@@ -14,6 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, scope, client_class)
|
15
15
|
@task = task
|
16
16
|
@scope = scope
|
17
|
+
@auth = Auth.new(task, scope)
|
17
18
|
@client_class = client_class
|
18
19
|
end
|
19
20
|
|
@@ -37,39 +38,7 @@ module Embulk
|
|
37
38
|
Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
|
38
39
|
Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
|
39
40
|
|
40
|
-
|
41
|
-
when 'private_key'
|
42
|
-
private_key_passphrase = 'notasecret'
|
43
|
-
key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
|
44
|
-
auth = Signet::OAuth2::Client.new(
|
45
|
-
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
46
|
-
audience: "https://accounts.google.com/o/oauth2/token",
|
47
|
-
scope: @scope,
|
48
|
-
issuer: @task['service_account_email'],
|
49
|
-
signing_key: key)
|
50
|
-
|
51
|
-
when 'compute_engine'
|
52
|
-
auth = Google::Auth::GCECredentials.new
|
53
|
-
|
54
|
-
when 'json_key'
|
55
|
-
json_key = @task['json_keyfile']
|
56
|
-
if File.exist?(json_key)
|
57
|
-
auth = File.open(json_key) do |f|
|
58
|
-
Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
key = StringIO.new(json_key)
|
62
|
-
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
|
63
|
-
end
|
64
|
-
|
65
|
-
when 'application_default'
|
66
|
-
auth = Google::Auth.get_application_default([@scope])
|
67
|
-
|
68
|
-
else
|
69
|
-
raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
|
70
|
-
end
|
71
|
-
|
72
|
-
client.authorization = auth
|
41
|
+
client.authorization = @auth.authenticate
|
73
42
|
|
74
43
|
@cached_client_expiration = Time.now + 1800
|
75
44
|
@cached_client = client
|