embulk-output-bigquery 0.6.3 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +17 -0
- data/Gemfile +1 -1
- data/README.md +11 -3
- data/embulk-output-bigquery.gemspec +8 -3
- data/lib/embulk/output/bigquery/bigquery_client.rb +35 -29
- data/lib/embulk/output/bigquery/gcs_client.rb +7 -6
- data/lib/embulk/output/bigquery/google_client.rb +3 -3
- data/lib/embulk/output/bigquery/value_converter_factory.rb +19 -0
- data/lib/embulk/output/bigquery.rb +13 -5
- data/test/test_bigquery_client.rb +1 -0
- data/test/test_configure.rb +11 -0
- data/test/test_helper.rb +4 -1
- data/test/test_value_converter_factory.rb +45 -1
- metadata +50 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5ad08405eb0d6f4a7ff867c6252d393628437d55e03fe33ec2cc71f940926153
|
4
|
+
data.tar.gz: 431ce1cf5298d5ec114a66191c9847f34fd554cc5de7c12fca8950a9e59538bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c23e79e1b7e6d7b2af8455e97fd58bb4c4bc5e7b7fb1809bd49642294adfe644c27beca55177cff8427f5136532accae3973db1b000660c96f532e6d1cbfbc09
|
7
|
+
data.tar.gz: 2d6ee9d0cf3504683bc2898df3edc246401a2cfd44db2f6311368903e89e09e9b31e746232005fdbbf43f039b91ca1fe1b4f190704ceee3e7a9778962de5be1b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 0.6.7 - 2021-09-10
|
2
|
+
* [enhancement] Add an expiration option of temporary table to clean up (thanks to @TKNGUE)
|
3
|
+
|
4
|
+
## 0.6.6 - 2021-06-10
|
5
|
+
|
6
|
+
* [maintenance] Fix network retry function (thanks to @case-k-git)
|
7
|
+
* [enhancement] Allow to specify the billing project and the project to which the data will be loaded separately (thanks to @ck-fm0211)
|
8
|
+
* [enhancement] Include original error message on json parse error (thanks to @k-yomo)
|
9
|
+
|
10
|
+
## 0.6.5 - 2021-06-10
|
11
|
+
* [maintenance] Fix failed tests (thanks to @kyoshidajp)
|
12
|
+
* [maintenance] Lock representable version for avoiding requiring Ruby 2.4 (thanks to @hiroyuki-sato)
|
13
|
+
|
14
|
+
## 0.6.4 - 2019-11-06
|
15
|
+
|
16
|
+
* [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
|
17
|
+
|
1
18
|
## 0.6.3 - 2019-10-28
|
2
19
|
|
3
20
|
* [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,7 @@ OAuth flow for installed applications.
|
|
33
33
|
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
34
|
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
35
|
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
36
|
+
| destination_project | string | optional | `project` value | A destination project to which the data will be loaded. Use this if you want to separate a billing project (the `project` value) and a destination project (the `destination_project` value). |
|
36
37
|
| dataset | string | required | | dataset |
|
37
38
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
38
39
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
@@ -78,6 +79,13 @@ Options for intermediate local files
|
|
78
79
|
| delete_from_local_when_job_end | boolean | optional | true | If set to true, delete generate local files when job is end |
|
79
80
|
| compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
|
80
81
|
|
82
|
+
|
83
|
+
Options for intermediate tables on BigQuery
|
84
|
+
|
85
|
+
| name | type | required? | default | description |
|
86
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
87
|
+
| temporary_table_expiration | integer | optional | | Temporary table's expiration time in seconds |
|
88
|
+
|
81
89
|
`source_format` is also used to determine formatter (csv or jsonl).
|
82
90
|
|
83
91
|
#### Same options of bq command-line tools or BigQuery job's property
|
@@ -307,12 +315,12 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
307
315
|
|
308
316
|
- **column_options**: advanced: an array of options for columns
|
309
317
|
- **name**: column name
|
310
|
-
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
318
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
311
319
|
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
312
320
|
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
313
321
|
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
314
|
-
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, `RECORD` (default: `STRING`)
|
315
|
-
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE` (default: `TIMESTAMP`)
|
322
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
|
323
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
|
316
324
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
317
325
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
318
326
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.6.
|
3
|
+
spec.version = "0.6.7"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -16,11 +16,16 @@ Gem::Specification.new do |spec|
|
|
16
16
|
|
17
17
|
# TODO
|
18
18
|
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
|
19
|
-
# Embulk 0.9 use JRuby 9.1.X.Y and
|
20
|
-
# So,
|
19
|
+
# Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
|
20
|
+
# So, force install signet < 0.12 and google-api-client < 0.33.0
|
21
|
+
# Also, representable version >= 3.1.0 requires Ruby version >= 2.4
|
21
22
|
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
|
22
23
|
spec.add_dependency 'google-api-client','< 0.33.0'
|
23
24
|
spec.add_dependency 'time_with_zone'
|
25
|
+
spec.add_dependency "representable", ['~> 3.0.0', '< 3.1']
|
26
|
+
# faraday 1.1.0 require >= Ruby 2.4.
|
27
|
+
# googleauth 0.9.0 requires faraday ~> 0.12
|
28
|
+
spec.add_dependency "faraday", '~> 0.12'
|
24
29
|
|
25
30
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
26
31
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
@@ -18,6 +18,7 @@ module Embulk
|
|
18
18
|
@schema = schema
|
19
19
|
reset_fields(fields) if fields
|
20
20
|
@project = @task['project']
|
21
|
+
@destination_project = @task['destination_project']
|
21
22
|
@dataset = @task['dataset']
|
22
23
|
@location = @task['location']
|
23
24
|
@location_for_log = @location.nil? ? 'us/eu' : @location
|
@@ -80,7 +81,7 @@ module Embulk
|
|
80
81
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
81
82
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
82
83
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
83
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@
|
84
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
84
85
|
|
85
86
|
body = {
|
86
87
|
job_reference: {
|
@@ -90,7 +91,7 @@ module Embulk
|
|
90
91
|
configuration: {
|
91
92
|
load: {
|
92
93
|
destination_table: {
|
93
|
-
project_id: @
|
94
|
+
project_id: @destination_project,
|
94
95
|
dataset_id: @dataset,
|
95
96
|
table_id: table,
|
96
97
|
},
|
@@ -130,7 +131,7 @@ module Embulk
|
|
130
131
|
Embulk.logger.error {
|
131
132
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
132
133
|
}
|
133
|
-
raise Error, "failed to load #{object_uris} to #{@
|
134
|
+
raise Error, "failed to load #{object_uris} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
134
135
|
end
|
135
136
|
end
|
136
137
|
end
|
@@ -171,7 +172,7 @@ module Embulk
|
|
171
172
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
172
173
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
173
174
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
174
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@
|
175
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
175
176
|
else
|
176
177
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
177
178
|
return
|
@@ -185,7 +186,7 @@ module Embulk
|
|
185
186
|
configuration: {
|
186
187
|
load: {
|
187
188
|
destination_table: {
|
188
|
-
project_id: @
|
189
|
+
project_id: @destination_project,
|
189
190
|
dataset_id: @dataset,
|
190
191
|
table_id: table,
|
191
192
|
},
|
@@ -232,7 +233,7 @@ module Embulk
|
|
232
233
|
Embulk.logger.error {
|
233
234
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
234
235
|
}
|
235
|
-
raise Error, "failed to load #{path} to #{@
|
236
|
+
raise Error, "failed to load #{path} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
236
237
|
end
|
237
238
|
end
|
238
239
|
end
|
@@ -245,7 +246,7 @@ module Embulk
|
|
245
246
|
|
246
247
|
Embulk.logger.info {
|
247
248
|
"embulk-output-bigquery: Copy job starting... job_id:[#{job_id}] " \
|
248
|
-
"#{@
|
249
|
+
"#{@destination_project}:#{@dataset}.#{source_table} => #{@destination_project}:#{destination_dataset}.#{destination_table}"
|
249
250
|
}
|
250
251
|
|
251
252
|
body = {
|
@@ -258,12 +259,12 @@ module Embulk
|
|
258
259
|
create_deposition: 'CREATE_IF_NEEDED',
|
259
260
|
write_disposition: write_disposition,
|
260
261
|
source_table: {
|
261
|
-
project_id: @
|
262
|
+
project_id: @destination_project,
|
262
263
|
dataset_id: @dataset,
|
263
264
|
table_id: source_table,
|
264
265
|
},
|
265
266
|
destination_table: {
|
266
|
-
project_id: @
|
267
|
+
project_id: @destination_project,
|
267
268
|
dataset_id: destination_dataset,
|
268
269
|
table_id: destination_table,
|
269
270
|
},
|
@@ -284,8 +285,8 @@ module Embulk
|
|
284
285
|
Embulk.logger.error {
|
285
286
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
286
287
|
}
|
287
|
-
raise Error, "failed to copy #{@
|
288
|
-
"to #{@
|
288
|
+
raise Error, "failed to copy #{@destination_project}:#{@dataset}.#{source_table} " \
|
289
|
+
"to #{@destination_project}:#{destination_dataset}.#{destination_table}, response:#{response}"
|
289
290
|
end
|
290
291
|
end
|
291
292
|
end
|
@@ -354,7 +355,7 @@ module Embulk
|
|
354
355
|
def create_dataset(dataset = nil, reference: nil)
|
355
356
|
dataset ||= @dataset
|
356
357
|
begin
|
357
|
-
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@
|
358
|
+
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@destination_project}:#{dataset} in #{@location_for_log}" }
|
358
359
|
hint = {}
|
359
360
|
if reference
|
360
361
|
response = get_dataset(reference)
|
@@ -382,25 +383,25 @@ module Embulk
|
|
382
383
|
Embulk.logger.error {
|
383
384
|
"embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
|
384
385
|
}
|
385
|
-
raise Error, "failed to create dataset #{@
|
386
|
+
raise Error, "failed to create dataset #{@destination_project}:#{dataset} in #{@location_for_log}, response:#{response}"
|
386
387
|
end
|
387
388
|
end
|
388
389
|
|
389
390
|
def get_dataset(dataset = nil)
|
390
391
|
dataset ||= @dataset
|
391
392
|
begin
|
392
|
-
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@
|
393
|
-
with_network_retry { client.get_dataset(@
|
393
|
+
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@destination_project}:#{dataset}" }
|
394
|
+
with_network_retry { client.get_dataset(@destination_project, dataset) }
|
394
395
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
395
396
|
if e.status_code == 404
|
396
|
-
raise NotFoundError, "Dataset #{@
|
397
|
+
raise NotFoundError, "Dataset #{@destination_project}:#{dataset} is not found"
|
397
398
|
end
|
398
399
|
|
399
400
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
400
401
|
Embulk.logger.error {
|
401
|
-
"embulk-output-bigquery: get_dataset(#{@
|
402
|
+
"embulk-output-bigquery: get_dataset(#{@destination_project}, #{dataset}), response:#{response}"
|
402
403
|
}
|
403
|
-
raise Error, "failed to get dataset #{@
|
404
|
+
raise Error, "failed to get dataset #{@destination_project}:#{dataset}, response:#{response}"
|
404
405
|
end
|
405
406
|
end
|
406
407
|
|
@@ -414,7 +415,7 @@ module Embulk
|
|
414
415
|
table = Helper.chomp_partition_decorator(table)
|
415
416
|
end
|
416
417
|
|
417
|
-
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@
|
418
|
+
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@destination_project}:#{dataset}.#{table}" }
|
418
419
|
body = {
|
419
420
|
table_reference: {
|
420
421
|
table_id: table,
|
@@ -439,6 +440,11 @@ module Embulk
|
|
439
440
|
}
|
440
441
|
end
|
441
442
|
|
443
|
+
if options['expiration_time']
|
444
|
+
# expiration_time is expressed in milliseconds
|
445
|
+
body[:expiration_time] = (Time.now.to_i + options['expiration_time']) * 1000
|
446
|
+
end
|
447
|
+
|
442
448
|
opts = {}
|
443
449
|
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
444
450
|
with_network_retry { client.insert_table(@project, dataset, body, opts) }
|
@@ -452,7 +458,7 @@ module Embulk
|
|
452
458
|
Embulk.logger.error {
|
453
459
|
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
454
460
|
}
|
455
|
-
raise Error, "failed to create table #{@
|
461
|
+
raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
456
462
|
end
|
457
463
|
end
|
458
464
|
|
@@ -469,8 +475,8 @@ module Embulk
|
|
469
475
|
def delete_table_or_partition(table, dataset: nil)
|
470
476
|
begin
|
471
477
|
dataset ||= @dataset
|
472
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@
|
473
|
-
with_network_retry { client.delete_table(@
|
478
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@destination_project}:#{dataset}.#{table}" }
|
479
|
+
with_network_retry { client.delete_table(@destination_project, dataset, table) }
|
474
480
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
475
481
|
if e.status_code == 404 && /Not found:/ =~ e.message
|
476
482
|
# ignore 'Not Found' error
|
@@ -479,9 +485,9 @@ module Embulk
|
|
479
485
|
|
480
486
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
481
487
|
Embulk.logger.error {
|
482
|
-
"embulk-output-bigquery: delete_table(#{@
|
488
|
+
"embulk-output-bigquery: delete_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
|
483
489
|
}
|
484
|
-
raise Error, "failed to delete table #{@
|
490
|
+
raise Error, "failed to delete table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
|
485
491
|
end
|
486
492
|
end
|
487
493
|
|
@@ -497,18 +503,18 @@ module Embulk
|
|
497
503
|
def get_table_or_partition(table, dataset: nil)
|
498
504
|
begin
|
499
505
|
dataset ||= @dataset
|
500
|
-
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@
|
501
|
-
with_network_retry { client.get_table(@
|
506
|
+
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@destination_project}:#{dataset}.#{table}" }
|
507
|
+
with_network_retry { client.get_table(@destination_project, dataset, table) }
|
502
508
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
503
509
|
if e.status_code == 404
|
504
|
-
raise NotFoundError, "Table #{@
|
510
|
+
raise NotFoundError, "Table #{@destination_project}:#{dataset}.#{table} is not found"
|
505
511
|
end
|
506
512
|
|
507
513
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
508
514
|
Embulk.logger.error {
|
509
|
-
"embulk-output-bigquery: get_table(#{@
|
515
|
+
"embulk-output-bigquery: get_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
|
510
516
|
}
|
511
|
-
raise Error, "failed to get table #{@
|
517
|
+
raise Error, "failed to get table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
|
512
518
|
end
|
513
519
|
end
|
514
520
|
end
|
@@ -16,6 +16,7 @@ module Embulk
|
|
16
16
|
super(task, scope, client_class)
|
17
17
|
|
18
18
|
@project = @task['project']
|
19
|
+
@destination_project = @task['destination_project']
|
19
20
|
@bucket = @task['gcs_bucket']
|
20
21
|
@location = @task['location']
|
21
22
|
end
|
@@ -23,7 +24,7 @@ module Embulk
|
|
23
24
|
def insert_temporary_bucket(bucket = nil)
|
24
25
|
bucket ||= @bucket
|
25
26
|
begin
|
26
|
-
Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@
|
27
|
+
Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@destination_project}:#{bucket}" }
|
27
28
|
body = {
|
28
29
|
name: bucket,
|
29
30
|
lifecycle: {
|
@@ -57,7 +58,7 @@ module Embulk
|
|
57
58
|
Embulk.logger.error {
|
58
59
|
"embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
|
59
60
|
}
|
60
|
-
raise Error, "failed to insert bucket #{@
|
61
|
+
raise Error, "failed to insert bucket #{@destination_project}:#{bucket}, response:#{response}"
|
61
62
|
end
|
62
63
|
end
|
63
64
|
|
@@ -69,7 +70,7 @@ module Embulk
|
|
69
70
|
|
70
71
|
started = Time.now
|
71
72
|
begin
|
72
|
-
Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@
|
73
|
+
Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@destination_project}:#{object_uri}" }
|
73
74
|
body = {
|
74
75
|
name: object,
|
75
76
|
}
|
@@ -86,7 +87,7 @@ module Embulk
|
|
86
87
|
Embulk.logger.error {
|
87
88
|
"embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
|
88
89
|
}
|
89
|
-
raise Error, "failed to insert object #{@
|
90
|
+
raise Error, "failed to insert object #{@destination_project}:#{object_uri}, response:#{response}"
|
90
91
|
end
|
91
92
|
end
|
92
93
|
|
@@ -109,7 +110,7 @@ module Embulk
|
|
109
110
|
object = object.start_with?('/') ? object[1..-1] : object
|
110
111
|
object_uri = URI.join("gs://#{bucket}", object).to_s
|
111
112
|
begin
|
112
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@
|
113
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@destination_project}:#{object_uri}" }
|
113
114
|
opts = {}
|
114
115
|
|
115
116
|
Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
|
@@ -122,7 +123,7 @@ module Embulk
|
|
122
123
|
Embulk.logger.error {
|
123
124
|
"embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
|
124
125
|
}
|
125
|
-
raise Error, "failed to delete object #{@
|
126
|
+
raise Error, "failed to delete object #{@destination_project}:#{object_uri}, response:#{response}"
|
126
127
|
end
|
127
128
|
end
|
128
129
|
end
|
@@ -50,7 +50,9 @@ module Embulk
|
|
50
50
|
begin
|
51
51
|
yield
|
52
52
|
rescue ::Java::Java.net.SocketException, ::Java::Java.net.ConnectException => e
|
53
|
-
if ['Broken pipe', 'Connection reset', 'Connection timed out'].include?(
|
53
|
+
if ['Broken pipe', 'Connection reset', 'Connection timed out'].select { |x| e.message.include?(x) }.empty?
|
54
|
+
raise e
|
55
|
+
else
|
54
56
|
if retries < @task['retries']
|
55
57
|
retries += 1
|
56
58
|
Embulk.logger.warn { "embulk-output-bigquery: retry \##{retries}, #{e.class} #{e.message}" }
|
@@ -59,8 +61,6 @@ module Embulk
|
|
59
61
|
Embulk.logger.error { "embulk-output-bigquery: retry exhausted \##{retries}, #{e.class} #{e.message}" }
|
60
62
|
raise e
|
61
63
|
end
|
62
|
-
else
|
63
|
-
raise e
|
64
64
|
end
|
65
65
|
end
|
66
66
|
end
|
@@ -210,6 +210,20 @@ module Embulk
|
|
210
210
|
TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
|
211
211
|
end
|
212
212
|
}
|
213
|
+
when 'DATETIME'
|
214
|
+
if @timestamp_format
|
215
|
+
Proc.new {|val|
|
216
|
+
next nil if val.nil?
|
217
|
+
with_typecast_error(val) do |val|
|
218
|
+
Time.strptime(val, @timestamp_format).strftime("%Y-%m-%d %H:%M:%S.%6N")
|
219
|
+
end
|
220
|
+
}
|
221
|
+
else
|
222
|
+
Proc.new {|val|
|
223
|
+
next nil if val.nil?
|
224
|
+
val # Users must care of BQ timestamp format
|
225
|
+
}
|
226
|
+
end
|
213
227
|
when 'RECORD'
|
214
228
|
Proc.new {|val|
|
215
229
|
next nil if val.nil?
|
@@ -252,6 +266,11 @@ module Embulk
|
|
252
266
|
next nil if val.nil?
|
253
267
|
val.localtime(zone_offset).strftime("%Y-%m-%d")
|
254
268
|
}
|
269
|
+
when 'DATETIME'
|
270
|
+
Proc.new {|val|
|
271
|
+
next nil if val.nil?
|
272
|
+
val.localtime(zone_offset).strftime("%Y-%m-%d %H:%M:%S.%6N")
|
273
|
+
}
|
255
274
|
else
|
256
275
|
raise NotSupportedType, "cannot take column type #{type} for timestamp column"
|
257
276
|
end
|
@@ -36,6 +36,7 @@ module Embulk
|
|
36
36
|
'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
|
37
37
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
38
38
|
'project' => config.param('project', :string, :default => nil),
|
39
|
+
'destination_project' => config.param('destination_project', :string, :default => nil),
|
39
40
|
'dataset' => config.param('dataset', :string),
|
40
41
|
'location' => config.param('location', :string, :default => nil),
|
41
42
|
'table' => config.param('table', :string),
|
@@ -89,6 +90,8 @@ module Embulk
|
|
89
90
|
'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
|
90
91
|
'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
|
91
92
|
|
93
|
+
'temporary_table_expiration' => config.param('temporary_table_expiration', :integer, :default => nil),
|
94
|
+
|
92
95
|
# for debug
|
93
96
|
'skip_load' => config.param('skip_load', :bool, :default => false),
|
94
97
|
'temp_table' => config.param('temp_table', :string, :default => nil),
|
@@ -135,12 +138,13 @@ module Embulk
|
|
135
138
|
json_key = JSON.parse(task['json_keyfile'])
|
136
139
|
task['project'] ||= json_key['project_id']
|
137
140
|
rescue => e
|
138
|
-
raise ConfigError.new "json_keyfile
|
141
|
+
raise ConfigError.new "Parsing 'json_keyfile' failed with error: #{e.class} #{e.message}"
|
139
142
|
end
|
140
143
|
end
|
141
144
|
if task['project'].nil?
|
142
145
|
raise ConfigError.new "Required field \"project\" is not set"
|
143
146
|
end
|
147
|
+
task['destination_project'] ||= task['project']
|
144
148
|
|
145
149
|
if (task['payload_column'] or task['payload_column_index']) and task['auto_create_table']
|
146
150
|
if task['schema_file'].nil? and task['template_table'].nil?
|
@@ -166,7 +170,7 @@ module Embulk
|
|
166
170
|
begin
|
167
171
|
JSON.parse(File.read(task['schema_file']))
|
168
172
|
rescue => e
|
169
|
-
raise ConfigError.new "schema_file #{task['schema_file']}
|
173
|
+
raise ConfigError.new "Parsing 'schema_file' #{task['schema_file']} failed with error: #{e.class} #{e.message}"
|
170
174
|
end
|
171
175
|
end
|
172
176
|
|
@@ -298,19 +302,23 @@ module Embulk
|
|
298
302
|
end
|
299
303
|
end
|
300
304
|
|
305
|
+
temp_table_expiration = task['temporary_table_expiration']
|
306
|
+
temp_options = {'expiration_time' => temp_table_expiration}
|
307
|
+
|
301
308
|
case task['mode']
|
302
309
|
when 'delete_in_advance'
|
303
310
|
bigquery.delete_table_or_partition(task['table'])
|
304
311
|
bigquery.create_table_if_not_exists(task['table'])
|
305
312
|
when 'replace'
|
306
|
-
bigquery.create_table_if_not_exists(task['temp_table'])
|
313
|
+
bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
|
307
314
|
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
308
315
|
when 'append'
|
309
|
-
bigquery.create_table_if_not_exists(task['temp_table'])
|
316
|
+
bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
|
310
317
|
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
311
318
|
when 'replace_backup'
|
312
|
-
bigquery.create_table_if_not_exists(task['temp_table'])
|
319
|
+
bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
|
313
320
|
bigquery.create_table_if_not_exists(task['table'])
|
321
|
+
|
314
322
|
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
|
315
323
|
else # append_direct
|
316
324
|
if task['auto_create_table']
|
@@ -29,6 +29,7 @@ else
|
|
29
29
|
def least_task
|
30
30
|
{
|
31
31
|
'project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
|
32
|
+
'destination_project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
|
32
33
|
'dataset' => 'your_dataset_name',
|
33
34
|
'table' => 'your_table_name',
|
34
35
|
'auth_method' => 'json_key',
|
data/test/test_configure.rb
CHANGED
@@ -45,6 +45,7 @@ module Embulk
|
|
45
45
|
assert_equal "application_default", task['auth_method']
|
46
46
|
assert_equal nil, task['json_keyfile']
|
47
47
|
assert_equal "your_project_name", task['project']
|
48
|
+
assert_equal "your_project_name", task['destination_project']
|
48
49
|
assert_equal "your_dataset_name", task['dataset']
|
49
50
|
assert_equal nil, task['location']
|
50
51
|
assert_equal "your_table_name", task['table']
|
@@ -284,6 +285,16 @@ module Embulk
|
|
284
285
|
config = least_config.merge('schema_update_options' => ['FOO'])
|
285
286
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
286
287
|
end
|
288
|
+
|
289
|
+
def test_destination_project
|
290
|
+
config = least_config.merge('destination_project' => 'your_destination_project_name')
|
291
|
+
task = Bigquery.configure(config, schema, processor_count)
|
292
|
+
|
293
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
294
|
+
assert_equal 'your_destination_project_name', task['destination_project']
|
295
|
+
assert_equal 'your_project_name', task['project']
|
296
|
+
end
|
297
|
+
|
287
298
|
end
|
288
299
|
end
|
289
300
|
end
|
data/test/test_helper.rb
CHANGED
@@ -63,7 +63,8 @@ module Embulk
|
|
63
63
|
Column.new({index: 3, name: 'string', type: :string}),
|
64
64
|
Column.new({index: 4, name: 'timestamp', type: :timestamp}),
|
65
65
|
Column.new({index: 5, name: 'date', type: :timestamp}),
|
66
|
-
Column.new({index: 6, name: '
|
66
|
+
Column.new({index: 6, name: 'datetime', type: :timestamp}),
|
67
|
+
Column.new({index: 7, name: 'json', type: :json}),
|
67
68
|
])
|
68
69
|
task = {
|
69
70
|
'column_options' => [
|
@@ -73,6 +74,7 @@ module Embulk
|
|
73
74
|
{'name' => 'string', 'type' => 'INTEGER'},
|
74
75
|
{'name' => 'timestamp', 'type' => 'INTEGER'},
|
75
76
|
{'name' => 'date', 'type' => 'DATE'},
|
77
|
+
{'name' => 'datetime', 'type' => 'DATETIME'},
|
76
78
|
{'name' => 'json', 'type' => 'RECORD', 'fields' => [
|
77
79
|
{ 'name' => 'key1', 'type' => 'STRING' },
|
78
80
|
]},
|
@@ -85,6 +87,7 @@ module Embulk
|
|
85
87
|
{name: 'string', type: 'INTEGER'},
|
86
88
|
{name: 'timestamp', type: 'INTEGER'},
|
87
89
|
{name: 'date', type: 'DATE'},
|
90
|
+
{name: 'datetime', type: 'DATETIME'},
|
88
91
|
{name: 'json', type: 'RECORD', fields: [
|
89
92
|
{name: 'key1', type: 'STRING'},
|
90
93
|
]},
|
@@ -94,6 +94,10 @@ module Embulk
|
|
94
94
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
95
95
|
end
|
96
96
|
|
97
|
+
def test_datetime
|
98
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
99
|
+
end
|
100
|
+
|
97
101
|
def test_record
|
98
102
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
99
103
|
end
|
@@ -138,6 +142,10 @@ module Embulk
|
|
138
142
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
139
143
|
end
|
140
144
|
|
145
|
+
def test_datetime
|
146
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
147
|
+
end
|
148
|
+
|
141
149
|
def test_record
|
142
150
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
143
151
|
end
|
@@ -178,6 +186,10 @@ module Embulk
|
|
178
186
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
179
187
|
end
|
180
188
|
|
189
|
+
def test_datetime
|
190
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
191
|
+
end
|
192
|
+
|
181
193
|
def test_record
|
182
194
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
183
195
|
end
|
@@ -236,6 +248,20 @@ module Embulk
|
|
236
248
|
assert_raise { converter.call('foo') }
|
237
249
|
end
|
238
250
|
|
251
|
+
def test_datetime
|
252
|
+
converter = ValueConverterFactory.new(
|
253
|
+
SCHEMA_TYPE, 'DATETIME',
|
254
|
+
timestamp_format: '%Y/%m/%d'
|
255
|
+
).create_converter
|
256
|
+
assert_equal nil, converter.call(nil)
|
257
|
+
assert_equal "2016-02-26 00:00:00.000000", converter.call("2016/02/26")
|
258
|
+
|
259
|
+
# Users must care of BQ datetime format by themselves with no timestamp_format
|
260
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
|
261
|
+
assert_equal nil, converter.call(nil)
|
262
|
+
assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
|
263
|
+
end
|
264
|
+
|
239
265
|
def test_record
|
240
266
|
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
|
241
267
|
assert_equal({'foo'=>'foo'}, converter.call(%Q[{"foo":"foo"}]))
|
@@ -294,7 +320,7 @@ module Embulk
|
|
294
320
|
timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
|
295
321
|
expected = "2016-02-26"
|
296
322
|
assert_equal expected, converter.call(timestamp)
|
297
|
-
|
323
|
+
|
298
324
|
converter = ValueConverterFactory.new(
|
299
325
|
SCHEMA_TYPE, 'DATE', timezone: 'Asia/Tokyo'
|
300
326
|
).create_converter
|
@@ -306,6 +332,24 @@ module Embulk
|
|
306
332
|
assert_raise { converter.call('foo') }
|
307
333
|
end
|
308
334
|
|
335
|
+
def test_datetime
|
336
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
|
337
|
+
assert_equal nil, converter.call(nil)
|
338
|
+
timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
|
339
|
+
expected = "2016-02-26 00:00:00.500000"
|
340
|
+
assert_equal expected, converter.call(timestamp)
|
341
|
+
|
342
|
+
converter = ValueConverterFactory.new(
|
343
|
+
SCHEMA_TYPE, 'DATETIME', timezone: 'Asia/Tokyo'
|
344
|
+
).create_converter
|
345
|
+
assert_equal nil, converter.call(nil)
|
346
|
+
timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
|
347
|
+
expected = "2016-02-26 00:00:00.500000"
|
348
|
+
assert_equal expected, converter.call(timestamp)
|
349
|
+
|
350
|
+
assert_raise { converter.call('foo') }
|
351
|
+
end
|
352
|
+
|
309
353
|
def test_record
|
310
354
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
311
355
|
end
|
metadata
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
- Naotoshi Seo
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-09-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
+
name: signet
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
18
|
- - "~>"
|
@@ -20,9 +21,8 @@ dependencies:
|
|
20
21
|
- - "<"
|
21
22
|
- !ruby/object:Gem::Version
|
22
23
|
version: 0.12.0
|
23
|
-
name: signet
|
24
|
-
prerelease: false
|
25
24
|
type: :runtime
|
25
|
+
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
27
27
|
requirements:
|
28
28
|
- - "~>"
|
@@ -32,56 +32,90 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.12.0
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
|
+
name: google-api-client
|
35
36
|
requirement: !ruby/object:Gem::Requirement
|
36
37
|
requirements:
|
37
38
|
- - "<"
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: 0.33.0
|
40
|
-
name: google-api-client
|
41
|
-
prerelease: false
|
42
41
|
type: :runtime
|
42
|
+
prerelease: false
|
43
43
|
version_requirements: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "<"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: 0.33.0
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
|
+
name: time_with_zone
|
49
50
|
requirement: !ruby/object:Gem::Requirement
|
50
51
|
requirements:
|
51
52
|
- - ">="
|
52
53
|
- !ruby/object:Gem::Version
|
53
54
|
version: '0'
|
54
|
-
name: time_with_zone
|
55
|
-
prerelease: false
|
56
55
|
type: :runtime
|
56
|
+
prerelease: false
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
|
+
name: representable
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 3.0.0
|
69
|
+
- - "<"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '3.1'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - "~>"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 3.0.0
|
79
|
+
- - "<"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '3.1'
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: faraday
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0.12'
|
89
|
+
type: :runtime
|
90
|
+
prerelease: false
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0.12'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: bundler
|
63
98
|
requirement: !ruby/object:Gem::Requirement
|
64
99
|
requirements:
|
65
100
|
- - ">="
|
66
101
|
- !ruby/object:Gem::Version
|
67
102
|
version: 1.10.6
|
68
|
-
name: bundler
|
69
|
-
prerelease: false
|
70
103
|
type: :development
|
104
|
+
prerelease: false
|
71
105
|
version_requirements: !ruby/object:Gem::Requirement
|
72
106
|
requirements:
|
73
107
|
- - ">="
|
74
108
|
- !ruby/object:Gem::Version
|
75
109
|
version: 1.10.6
|
76
110
|
- !ruby/object:Gem::Dependency
|
111
|
+
name: rake
|
77
112
|
requirement: !ruby/object:Gem::Requirement
|
78
113
|
requirements:
|
79
114
|
- - ">="
|
80
115
|
- !ruby/object:Gem::Version
|
81
116
|
version: '10.0'
|
82
|
-
name: rake
|
83
|
-
prerelease: false
|
84
117
|
type: :development
|
118
|
+
prerelease: false
|
85
119
|
version_requirements: !ruby/object:Gem::Requirement
|
86
120
|
requirements:
|
87
121
|
- - ">="
|
@@ -123,7 +157,7 @@ homepage: https://github.com/embulk/embulk-output-bigquery
|
|
123
157
|
licenses:
|
124
158
|
- MIT
|
125
159
|
metadata: {}
|
126
|
-
post_install_message:
|
160
|
+
post_install_message:
|
127
161
|
rdoc_options: []
|
128
162
|
require_paths:
|
129
163
|
- lib
|
@@ -138,9 +172,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
172
|
- !ruby/object:Gem::Version
|
139
173
|
version: '0'
|
140
174
|
requirements: []
|
141
|
-
|
142
|
-
|
143
|
-
signing_key:
|
175
|
+
rubygems_version: 3.0.3
|
176
|
+
signing_key:
|
144
177
|
specification_version: 4
|
145
178
|
summary: Google BigQuery output plugin for Embulk
|
146
179
|
test_files:
|