embulk-output-bigquery 0.5.0 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +29 -4
- data/README.md +71 -42
- data/embulk-output-bigquery.gemspec +10 -3
- data/lib/embulk/output/bigquery.rb +11 -20
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
- data/test/test_bigquery_client.rb +1 -5
- data/test/test_configure.rb +4 -12
- data/test/test_helper.rb +7 -1
- data/test/test_transaction.rb +5 -6
- data/test/test_value_converter_factory.rb +86 -0
- metadata +29 -51
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_field_partitioned_table.yml +0 -34
- data/example/config_replace_backup_partitioned_table.yml +0 -34
- data/example/config_replace_field_partitioned_table.yml +0 -33
- data/example/config_replace_partitioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7ea0cf04e91a092e3d97bf3e46d1c181aab4943f
|
4
|
+
data.tar.gz: 60e970acbc16128189df8c274a832d23160e4c80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14fb288ad9781515a28cf72869ca7e76081202b8cf7e29d2448bb9ec37e6a8e8e73a046930f53c41785551b354a990ccc8ee2f9298b418de672c6dfaa2e6447b
|
7
|
+
data.tar.gz: 9f8f59c89cf7cc9974ab8a287ffbf522263ff5e81ff988de95f22c9086cf805d39d8f9c7c7d41310357464a29e5ec0404fe819b5ba41de36eb0d870c4ca31144
|
data/CHANGELOG.md
CHANGED
@@ -1,9 +1,34 @@
|
|
1
|
+
## 0.6.4 - 2019-11-06
|
2
|
+
|
3
|
+
* [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
|
4
|
+
|
5
|
+
## 0.6.3 - 2019-10-28
|
6
|
+
|
7
|
+
* [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
|
8
|
+
|
9
|
+
## 0.6.2 - 2019-10-16
|
10
|
+
|
11
|
+
* [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
|
12
|
+
|
13
|
+
## 0.6.1 - 2019-08-28
|
14
|
+
|
15
|
+
* [maintenance] Release a new gem not to include symlinks to make it work on Windows.
|
16
|
+
|
17
|
+
## 0.6.0 - 2019-08-11
|
18
|
+
|
19
|
+
Cleanup `auth_method`:
|
20
|
+
|
21
|
+
* [enhancement] Support `auth_method: authorized_user` (OAuth)
|
22
|
+
* [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
|
23
|
+
* [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
|
24
|
+
* [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
|
25
|
+
|
1
26
|
## 0.5.0 - 2019-08-10
|
2
27
|
|
3
|
-
* [incompatibility change] Drop deprecated
|
4
|
-
* [incompatibility change] Drop
|
5
|
-
* [incompatibility change]
|
6
|
-
|
28
|
+
* [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
|
29
|
+
* [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
|
30
|
+
* [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
|
31
|
+
* [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
|
7
32
|
|
8
33
|
## 0.4.14 - 2019-08-10
|
9
34
|
|
data/README.md
CHANGED
@@ -29,17 +29,15 @@ OAuth flow for installed applications.
|
|
29
29
|
|
30
30
|
| name | type | required? | default | description |
|
31
31
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
32
|
-
| mode | string | optional | "append" | See [Mode](#mode)
|
33
|
-
| auth_method | string | optional | "
|
34
|
-
|
|
35
|
-
|
|
36
|
-
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
37
|
-
| project | string | required if json_keyfile is not given | | project_id |
|
32
|
+
| mode | string | optional | "append" | See [Mode](#mode) |
|
33
|
+
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
|
+
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
|
+
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
38
36
|
| dataset | string | required | | dataset |
|
39
37
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
40
38
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
41
39
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
42
|
-
| auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes
|
40
|
+
| auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
|
43
41
|
| schema_file | string | optional | | /path/to/schema.json |
|
44
42
|
| template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
|
45
43
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
@@ -108,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
108
106
|
out:
|
109
107
|
type: bigquery
|
110
108
|
mode: append
|
111
|
-
auth_method:
|
112
|
-
|
113
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
109
|
+
auth_method: service_account
|
110
|
+
json_keyfile: /path/to/json_keyfile.json
|
114
111
|
project: your-project-000
|
115
112
|
dataset: your_dataset_name
|
116
113
|
table: your_table_name
|
@@ -118,7 +115,7 @@ out:
|
|
118
115
|
source_format: NEWLINE_DELIMITED_JSON
|
119
116
|
```
|
120
117
|
|
121
|
-
###
|
118
|
+
### Location
|
122
119
|
|
123
120
|
The geographic location of the dataset. Required except for US and EU.
|
124
121
|
|
@@ -126,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
|
|
126
123
|
|
127
124
|
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
128
125
|
|
129
|
-
###
|
126
|
+
### Mode
|
130
127
|
|
131
128
|
5 modes are provided.
|
132
129
|
|
@@ -165,53 +162,69 @@ NOTE: BigQuery does not support replacing (actually, copying into) a non-partiti
|
|
165
162
|
|
166
163
|
### Authentication
|
167
164
|
|
168
|
-
There are
|
165
|
+
There are four authentication methods
|
166
|
+
|
167
|
+
1. `service_account` (or `json_key` for backward compatibility)
|
168
|
+
1. `authorized_user`
|
169
|
+
1. `compute_engine`
|
170
|
+
1. `application_default`
|
169
171
|
|
170
|
-
|
171
|
-
2. JSON key of GCP(Google Cloud Platform)'s service account
|
172
|
-
3. Pre-defined access token (Google Compute Engine only)
|
172
|
+
#### service\_account (or json\_key)
|
173
173
|
|
174
|
-
|
174
|
+
Use GCP service account credentials.
|
175
|
+
You first need to create a service account, download its json key and deploy the key with embulk.
|
176
|
+
|
177
|
+
```yaml
|
178
|
+
out:
|
179
|
+
type: bigquery
|
180
|
+
auth_method: service_account
|
181
|
+
json_keyfile: /path/to/json_keyfile.json
|
182
|
+
```
|
175
183
|
|
176
|
-
You
|
177
|
-
download its private key and deploy the key with embulk.
|
184
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
178
185
|
|
179
186
|
```yaml
|
180
187
|
out:
|
181
188
|
type: bigquery
|
182
|
-
auth_method:
|
183
|
-
|
184
|
-
|
189
|
+
auth_method: service_account
|
190
|
+
json_keyfile:
|
191
|
+
content: |
|
192
|
+
{
|
193
|
+
"private_key_id": "123456789",
|
194
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
195
|
+
"client_email": "..."
|
196
|
+
}
|
185
197
|
```
|
186
198
|
|
187
|
-
####
|
199
|
+
#### authorized\_user
|
188
200
|
|
189
|
-
|
190
|
-
|
201
|
+
Use Google user credentials.
|
202
|
+
You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
|
191
203
|
|
192
204
|
```yaml
|
193
205
|
out:
|
194
206
|
type: bigquery
|
195
|
-
auth_method:
|
196
|
-
json_keyfile: /path/to/
|
207
|
+
auth_method: authorized_user
|
208
|
+
json_keyfile: /path/to/credentials.json
|
197
209
|
```
|
198
210
|
|
199
|
-
You can also embed contents of json_keyfile at config.yml.
|
211
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
200
212
|
|
201
213
|
```yaml
|
202
214
|
out:
|
203
215
|
type: bigquery
|
204
|
-
auth_method:
|
216
|
+
auth_method: authorized_user
|
205
217
|
json_keyfile:
|
206
218
|
content: |
|
207
219
|
{
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
220
|
+
"client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
|
221
|
+
"client_secret":"xxxxxxxxxxx",
|
222
|
+
"refresh_token":"xxxxxxxxxxx",
|
223
|
+
"type":"authorized_user"
|
224
|
+
}
|
212
225
|
```
|
213
226
|
|
214
|
-
####
|
227
|
+
#### compute\_engine
|
215
228
|
|
216
229
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
217
230
|
run embulk in Google Compute Engine. In this third authentication method, you need to
|
@@ -224,6 +237,22 @@ out:
|
|
224
237
|
auth_method: compute_engine
|
225
238
|
```
|
226
239
|
|
240
|
+
#### application\_default
|
241
|
+
|
242
|
+
Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
|
243
|
+
|
244
|
+
1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
|
245
|
+
2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
|
246
|
+
3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
|
247
|
+
|
248
|
+
See https://cloud.google.com/docs/authentication/production for details.
|
249
|
+
|
250
|
+
```yaml
|
251
|
+
out:
|
252
|
+
type: bigquery
|
253
|
+
auth_method: application_default
|
254
|
+
```
|
255
|
+
|
227
256
|
### Table id formatting
|
228
257
|
|
229
258
|
`table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
@@ -232,12 +261,12 @@ Table ids are formatted at runtime
|
|
232
261
|
using the local time of the embulk server.
|
233
262
|
|
234
263
|
For example, with the configuration below,
|
235
|
-
data is inserted into tables `
|
264
|
+
data is inserted into tables `table_20150503`, `table_20150504` and so on.
|
236
265
|
|
237
266
|
```yaml
|
238
267
|
out:
|
239
268
|
type: bigquery
|
240
|
-
table: table_%
|
269
|
+
table: table_%Y%m%d
|
241
270
|
```
|
242
271
|
|
243
272
|
### Dynamic table creating
|
@@ -252,7 +281,7 @@ Please set file path of schema.json.
|
|
252
281
|
out:
|
253
282
|
type: bigquery
|
254
283
|
auto_create_table: true
|
255
|
-
table: table_%
|
284
|
+
table: table_%Y%m%d
|
256
285
|
schema_file: /path/to/schema.json
|
257
286
|
```
|
258
287
|
|
@@ -264,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
|
|
264
293
|
out:
|
265
294
|
type: bigquery
|
266
295
|
auto_create_table: true
|
267
|
-
table: table_%
|
296
|
+
table: table_%Y%m%d
|
268
297
|
template_table: existing_table_name
|
269
298
|
```
|
270
299
|
|
@@ -278,17 +307,17 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
278
307
|
|
279
308
|
- **column_options**: advanced: an array of options for columns
|
280
309
|
- **name**: column name
|
281
|
-
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
|
310
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
282
311
|
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
283
312
|
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
284
313
|
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
285
|
-
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
|
286
|
-
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
|
314
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
|
315
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
|
287
316
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
288
317
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
289
318
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
290
319
|
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
291
|
-
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
320
|
+
- **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
|
292
321
|
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
293
322
|
- **default_timezone**: default timezone for column_options (string, default is "UTC")
|
294
323
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.6.4"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.licenses = ["MIT"]
|
9
9
|
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
10
10
|
|
11
|
-
|
11
|
+
# Exclude example directory which uses symlinks from generating gem.
|
12
|
+
# Symlinks do not work properly on the Windows platform without administrator privilege.
|
13
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
|
12
14
|
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
15
|
spec.require_paths = ["lib"]
|
14
16
|
|
15
|
-
|
17
|
+
# TODO
|
18
|
+
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
|
19
|
+
# Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
|
20
|
+
# So, Force install signet < 0.12 and google-api-client < 0.33.0
|
21
|
+
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
|
22
|
+
spec.add_dependency 'google-api-client','< 0.33.0'
|
16
23
|
spec.add_dependency 'time_with_zone'
|
17
24
|
|
18
25
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
@@ -23,7 +23,7 @@ module Embulk
|
|
23
23
|
# @return JSON string
|
24
24
|
def self.load(v)
|
25
25
|
if v.is_a?(String) # path
|
26
|
-
File.read(v)
|
26
|
+
File.read(File.expand_path(v))
|
27
27
|
elsif v.is_a?(Hash)
|
28
28
|
v['content']
|
29
29
|
end
|
@@ -33,9 +33,7 @@ module Embulk
|
|
33
33
|
def self.configure(config, schema, task_count)
|
34
34
|
task = {
|
35
35
|
'mode' => config.param('mode', :string, :default => 'append'),
|
36
|
-
'auth_method' => config.param('auth_method', :string, :default => '
|
37
|
-
'service_account_email' => config.param('service_account_email', :string, :default => nil),
|
38
|
-
'p12_keyfile' => config.param('p12_keyfile', :string, :default => nil),
|
36
|
+
'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
|
39
37
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
40
38
|
'project' => config.param('project', :string, :default => nil),
|
41
39
|
'dataset' => config.param('dataset', :string),
|
@@ -125,28 +123,21 @@ module Embulk
|
|
125
123
|
end
|
126
124
|
|
127
125
|
task['auth_method'] = task['auth_method'].downcase
|
128
|
-
unless %w[
|
129
|
-
raise ConfigError.new "`auth_method` must be one of
|
126
|
+
unless %w[json_key service_account authorized_user compute_engine application_default].include?(task['auth_method'])
|
127
|
+
raise ConfigError.new "`auth_method` must be one of service_account (or json_key), authorized_user, compute_engine, application_default"
|
130
128
|
end
|
131
|
-
if task['auth_method'] == '
|
132
|
-
raise ConfigError.new "`
|
133
|
-
end
|
134
|
-
if task['auth_method'] == 'json_key' and task['json_keyfile'].nil?
|
135
|
-
raise ConfigError.new "`json_keyfile` is required for auth_method json_key"
|
129
|
+
if (task['auth_method'] == 'service_account' or task['auth_method'] == 'json_key') and task['json_keyfile'].nil?
|
130
|
+
raise ConfigError.new "`json_keyfile` is required for auth_method: service_account (or json_key)"
|
136
131
|
end
|
137
132
|
|
138
|
-
jsonkey_params = nil
|
139
133
|
if task['json_keyfile']
|
140
134
|
begin
|
141
|
-
|
135
|
+
json_key = JSON.parse(task['json_keyfile'])
|
136
|
+
task['project'] ||= json_key['project_id']
|
142
137
|
rescue => e
|
143
138
|
raise ConfigError.new "json_keyfile is not a JSON file"
|
144
139
|
end
|
145
140
|
end
|
146
|
-
|
147
|
-
if jsonkey_params
|
148
|
-
task['project'] ||= jsonkey_params['project_id']
|
149
|
-
end
|
150
141
|
if task['project'].nil?
|
151
142
|
raise ConfigError.new "Required field \"project\" is not set"
|
152
143
|
end
|
@@ -313,14 +304,14 @@ module Embulk
|
|
313
304
|
bigquery.create_table_if_not_exists(task['table'])
|
314
305
|
when 'replace'
|
315
306
|
bigquery.create_table_if_not_exists(task['temp_table'])
|
316
|
-
bigquery.create_table_if_not_exists(task['table'])
|
307
|
+
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
317
308
|
when 'append'
|
318
309
|
bigquery.create_table_if_not_exists(task['temp_table'])
|
319
|
-
bigquery.create_table_if_not_exists(task['table'])
|
310
|
+
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
320
311
|
when 'replace_backup'
|
321
312
|
bigquery.create_table_if_not_exists(task['temp_table'])
|
322
313
|
bigquery.create_table_if_not_exists(task['table'])
|
323
|
-
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
|
314
|
+
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
|
324
315
|
else # append_direct
|
325
316
|
if task['auto_create_table']
|
326
317
|
bigquery.create_table_if_not_exists(task['table'])
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'googleauth'
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Output
|
5
|
+
class Bigquery < OutputPlugin
|
6
|
+
class Auth
|
7
|
+
|
8
|
+
attr_reader :auth_method, :json_key, :scope
|
9
|
+
|
10
|
+
def initialize(task, scope)
|
11
|
+
@auth_method = task['auth_method']
|
12
|
+
@json_key = task['json_keyfile']
|
13
|
+
@scope = scope
|
14
|
+
end
|
15
|
+
|
16
|
+
def authenticate
|
17
|
+
case auth_method
|
18
|
+
when 'authorized_user'
|
19
|
+
key = StringIO.new(json_key)
|
20
|
+
return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
|
21
|
+
when 'compute_engine'
|
22
|
+
return Google::Auth::GCECredentials.new
|
23
|
+
when 'service_account', 'json_key' # json_key is for backward compatibility
|
24
|
+
key = StringIO.new(json_key)
|
25
|
+
return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
26
|
+
when 'application_default'
|
27
|
+
return Google::Auth.get_application_default([scope])
|
28
|
+
else
|
29
|
+
raise ConfigError.new("Unknown auth method: #{auth_method}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'auth'
|
2
2
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
@@ -14,6 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, scope, client_class)
|
15
15
|
@task = task
|
16
16
|
@scope = scope
|
17
|
+
@auth = Auth.new(task, scope)
|
17
18
|
@client_class = client_class
|
18
19
|
end
|
19
20
|
|
@@ -37,39 +38,7 @@ module Embulk
|
|
37
38
|
Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
|
38
39
|
Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
|
39
40
|
|
40
|
-
|
41
|
-
when 'private_key'
|
42
|
-
private_key_passphrase = 'notasecret'
|
43
|
-
key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
|
44
|
-
auth = Signet::OAuth2::Client.new(
|
45
|
-
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
46
|
-
audience: "https://accounts.google.com/o/oauth2/token",
|
47
|
-
scope: @scope,
|
48
|
-
issuer: @task['service_account_email'],
|
49
|
-
signing_key: key)
|
50
|
-
|
51
|
-
when 'compute_engine'
|
52
|
-
auth = Google::Auth::GCECredentials.new
|
53
|
-
|
54
|
-
when 'json_key'
|
55
|
-
json_key = @task['json_keyfile']
|
56
|
-
if File.exist?(json_key)
|
57
|
-
auth = File.open(json_key) do |f|
|
58
|
-
Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
key = StringIO.new(json_key)
|
62
|
-
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
|
63
|
-
end
|
64
|
-
|
65
|
-
when 'application_default'
|
66
|
-
auth = Google::Auth.get_application_default([@scope])
|
67
|
-
|
68
|
-
else
|
69
|
-
raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
|
70
|
-
end
|
71
|
-
|
72
|
-
client.authorization = auth
|
41
|
+
client.authorization = @auth.authenticate
|
73
42
|
|
74
43
|
@cached_client_expiration = Time.now + 1800
|
75
44
|
@cached_client = client
|