embulk-output-bigquery 0.6.3 → 0.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +17 -0
- data/Gemfile +1 -1
- data/README.md +11 -3
- data/embulk-output-bigquery.gemspec +8 -3
- data/lib/embulk/output/bigquery/bigquery_client.rb +35 -29
- data/lib/embulk/output/bigquery/gcs_client.rb +7 -6
- data/lib/embulk/output/bigquery/google_client.rb +3 -3
- data/lib/embulk/output/bigquery/value_converter_factory.rb +19 -0
- data/lib/embulk/output/bigquery.rb +13 -5
- data/test/test_bigquery_client.rb +1 -0
- data/test/test_configure.rb +11 -0
- data/test/test_helper.rb +4 -1
- data/test/test_value_converter_factory.rb +45 -1
- metadata +50 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5ad08405eb0d6f4a7ff867c6252d393628437d55e03fe33ec2cc71f940926153
|
4
|
+
data.tar.gz: 431ce1cf5298d5ec114a66191c9847f34fd554cc5de7c12fca8950a9e59538bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c23e79e1b7e6d7b2af8455e97fd58bb4c4bc5e7b7fb1809bd49642294adfe644c27beca55177cff8427f5136532accae3973db1b000660c96f532e6d1cbfbc09
|
7
|
+
data.tar.gz: 2d6ee9d0cf3504683bc2898df3edc246401a2cfd44db2f6311368903e89e09e9b31e746232005fdbbf43f039b91ca1fe1b4f190704ceee3e7a9778962de5be1b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 0.6.7 - 2021-09-10
|
2
|
+
* [enhancement] Add an expiration option of temporary table to clean up (thanks to @TKNGUE)
|
3
|
+
|
4
|
+
## 0.6.6 - 2021-06-10
|
5
|
+
|
6
|
+
* [maintenance] Fix network retry function (thanks to @case-k-git)
|
7
|
+
* [enhancement] Allow to specify the billing project and the project to which the data will be loaded separately (thanks to @ck-fm0211)
|
8
|
+
* [enhancement] Include original error message on json parse error (thanks to @k-yomo)
|
9
|
+
|
10
|
+
## 0.6.5 - 2021-06-10
|
11
|
+
* [maintenance] Fix failed tests (thanks to @kyoshidajp)
|
12
|
+
* [maintenance] Lock representable version for avoiding requiring Ruby 2.4 (thanks to @hiroyuki-sato)
|
13
|
+
|
14
|
+
## 0.6.4 - 2019-11-06
|
15
|
+
|
16
|
+
* [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
|
17
|
+
|
1
18
|
## 0.6.3 - 2019-10-28
|
2
19
|
|
3
20
|
* [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,7 @@ OAuth flow for installed applications.
|
|
33
33
|
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
34
|
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
35
|
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
36
|
+
| destination_project | string | optional | `project` value | A destination project to which the data will be loaded. Use this if you want to separate a billing project (the `project` value) and a destination project (the `destination_project` value). |
|
36
37
|
| dataset | string | required | | dataset |
|
37
38
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
38
39
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
@@ -78,6 +79,13 @@ Options for intermediate local files
|
|
78
79
|
| delete_from_local_when_job_end | boolean | optional | true | If set to true, delete generate local files when job is end |
|
79
80
|
| compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
|
80
81
|
|
82
|
+
|
83
|
+
Options for intermediate tables on BigQuery
|
84
|
+
|
85
|
+
| name | type | required? | default | description |
|
86
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
87
|
+
| temporary_table_expiration | integer | optional | | Temporary table's expiration time in seconds |
|
88
|
+
|
81
89
|
`source_format` is also used to determine formatter (csv or jsonl).
|
82
90
|
|
83
91
|
#### Same options of bq command-line tools or BigQuery job's property
|
@@ -307,12 +315,12 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
307
315
|
|
308
316
|
- **column_options**: advanced: an array of options for columns
|
309
317
|
- **name**: column name
|
310
|
-
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
318
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
311
319
|
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
312
320
|
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
313
321
|
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
314
|
-
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, `RECORD` (default: `STRING`)
|
315
|
-
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE` (default: `TIMESTAMP`)
|
322
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
|
323
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
|
316
324
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
317
325
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
318
326
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.6.
|
3
|
+
spec.version = "0.6.7"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -16,11 +16,16 @@ Gem::Specification.new do |spec|
|
|
16
16
|
|
17
17
|
# TODO
|
18
18
|
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
|
19
|
-
# Embulk 0.9 use JRuby 9.1.X.Y and
|
20
|
-
# So,
|
19
|
+
# Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
|
20
|
+
# So, force install signet < 0.12 and google-api-client < 0.33.0
|
21
|
+
# Also, representable version >= 3.1.0 requires Ruby version >= 2.4
|
21
22
|
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
|
22
23
|
spec.add_dependency 'google-api-client','< 0.33.0'
|
23
24
|
spec.add_dependency 'time_with_zone'
|
25
|
+
spec.add_dependency "representable", ['~> 3.0.0', '< 3.1']
|
26
|
+
# faraday 1.1.0 require >= Ruby 2.4.
|
27
|
+
# googleauth 0.9.0 requires faraday ~> 0.12
|
28
|
+
spec.add_dependency "faraday", '~> 0.12'
|
24
29
|
|
25
30
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
26
31
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
@@ -18,6 +18,7 @@ module Embulk
|
|
18
18
|
@schema = schema
|
19
19
|
reset_fields(fields) if fields
|
20
20
|
@project = @task['project']
|
21
|
+
@destination_project = @task['destination_project']
|
21
22
|
@dataset = @task['dataset']
|
22
23
|
@location = @task['location']
|
23
24
|
@location_for_log = @location.nil? ? 'us/eu' : @location
|
@@ -80,7 +81,7 @@ module Embulk
|
|
80
81
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
81
82
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
82
83
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
83
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@
|
84
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
84
85
|
|
85
86
|
body = {
|
86
87
|
job_reference: {
|
@@ -90,7 +91,7 @@ module Embulk
|
|
90
91
|
configuration: {
|
91
92
|
load: {
|
92
93
|
destination_table: {
|
93
|
-
project_id: @
|
94
|
+
project_id: @destination_project,
|
94
95
|
dataset_id: @dataset,
|
95
96
|
table_id: table,
|
96
97
|
},
|
@@ -130,7 +131,7 @@ module Embulk
|
|
130
131
|
Embulk.logger.error {
|
131
132
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
132
133
|
}
|
133
|
-
raise Error, "failed to load #{object_uris} to #{@
|
134
|
+
raise Error, "failed to load #{object_uris} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
134
135
|
end
|
135
136
|
end
|
136
137
|
end
|
@@ -171,7 +172,7 @@ module Embulk
|
|
171
172
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
172
173
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
173
174
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
174
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@
|
175
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
175
176
|
else
|
176
177
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
177
178
|
return
|
@@ -185,7 +186,7 @@ module Embulk
|
|
185
186
|
configuration: {
|
186
187
|
load: {
|
187
188
|
destination_table: {
|
188
|
-
project_id: @
|
189
|
+
project_id: @destination_project,
|
189
190
|
dataset_id: @dataset,
|
190
191
|
table_id: table,
|
191
192
|
},
|
@@ -232,7 +233,7 @@ module Embulk
|
|
232
233
|
Embulk.logger.error {
|
233
234
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
234
235
|
}
|
235
|
-
raise Error, "failed to load #{path} to #{@
|
236
|
+
raise Error, "failed to load #{path} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
236
237
|
end
|
237
238
|
end
|
238
239
|
end
|
@@ -245,7 +246,7 @@ module Embulk
|
|
245
246
|
|
246
247
|
Embulk.logger.info {
|
247
248
|
"embulk-output-bigquery: Copy job starting... job_id:[#{job_id}] " \
|
248
|
-
"#{@
|
249
|
+
"#{@destination_project}:#{@dataset}.#{source_table} => #{@destination_project}:#{destination_dataset}.#{destination_table}"
|
249
250
|
}
|
250
251
|
|
251
252
|
body = {
|
@@ -258,12 +259,12 @@ module Embulk
|
|
258
259
|
create_deposition: 'CREATE_IF_NEEDED',
|
259
260
|
write_disposition: write_disposition,
|
260
261
|
source_table: {
|
261
|
-
project_id: @
|
262
|
+
project_id: @destination_project,
|
262
263
|
dataset_id: @dataset,
|
263
264
|
table_id: source_table,
|
264
265
|
},
|
265
266
|
destination_table: {
|
266
|
-
project_id: @
|
267
|
+
project_id: @destination_project,
|
267
268
|
dataset_id: destination_dataset,
|
268
269
|
table_id: destination_table,
|
269
270
|
},
|
@@ -284,8 +285,8 @@ module Embulk
|
|
284
285
|
Embulk.logger.error {
|
285
286
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
286
287
|
}
|
287
|
-
raise Error, "failed to copy #{@
|
288
|
-
"to #{@
|
288
|
+
raise Error, "failed to copy #{@destination_project}:#{@dataset}.#{source_table} " \
|
289
|
+
"to #{@destination_project}:#{destination_dataset}.#{destination_table}, response:#{response}"
|
289
290
|
end
|
290
291
|
end
|
291
292
|
end
|
@@ -354,7 +355,7 @@ module Embulk
|
|
354
355
|
def create_dataset(dataset = nil, reference: nil)
|
355
356
|
dataset ||= @dataset
|
356
357
|
begin
|
357
|
-
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@
|
358
|
+
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@destination_project}:#{dataset} in #{@location_for_log}" }
|
358
359
|
hint = {}
|
359
360
|
if reference
|
360
361
|
response = get_dataset(reference)
|
@@ -382,25 +383,25 @@ module Embulk
|
|
382
383
|
Embulk.logger.error {
|
383
384
|
"embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
|
384
385
|
}
|
385
|
-
raise Error, "failed to create dataset #{@
|
386
|
+
raise Error, "failed to create dataset #{@destination_project}:#{dataset} in #{@location_for_log}, response:#{response}"
|
386
387
|
end
|
387
388
|
end
|
388
389
|
|
389
390
|
def get_dataset(dataset = nil)
|
390
391
|
dataset ||= @dataset
|
391
392
|
begin
|
392
|
-
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@
|
393
|
-
with_network_retry { client.get_dataset(@
|
393
|
+
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@destination_project}:#{dataset}" }
|
394
|
+
with_network_retry { client.get_dataset(@destination_project, dataset) }
|
394
395
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
395
396
|
if e.status_code == 404
|
396
|
-
raise NotFoundError, "Dataset #{@
|
397
|
+
raise NotFoundError, "Dataset #{@destination_project}:#{dataset} is not found"
|
397
398
|
end
|
398
399
|
|
399
400
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
400
401
|
Embulk.logger.error {
|
401
|
-
"embulk-output-bigquery: get_dataset(#{@
|
402
|
+
"embulk-output-bigquery: get_dataset(#{@destination_project}, #{dataset}), response:#{response}"
|
402
403
|
}
|
403
|
-
raise Error, "failed to get dataset #{@
|
404
|
+
raise Error, "failed to get dataset #{@destination_project}:#{dataset}, response:#{response}"
|
404
405
|
end
|
405
406
|
end
|
406
407
|
|
@@ -414,7 +415,7 @@ module Embulk
|
|
414
415
|
table = Helper.chomp_partition_decorator(table)
|
415
416
|
end
|
416
417
|
|
417
|
-
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@
|
418
|
+
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@destination_project}:#{dataset}.#{table}" }
|
418
419
|
body = {
|
419
420
|
table_reference: {
|
420
421
|
table_id: table,
|
@@ -439,6 +440,11 @@ module Embulk
|
|
439
440
|
}
|
440
441
|
end
|
441
442
|
|
443
|
+
if options['expiration_time']
|
444
|
+
# expiration_time is expressed in milliseconds
|
445
|
+
body[:expiration_time] = (Time.now.to_i + options['expiration_time']) * 1000
|
446
|
+
end
|
447
|
+
|
442
448
|
opts = {}
|
443
449
|
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
444
450
|
with_network_retry { client.insert_table(@project, dataset, body, opts) }
|
@@ -452,7 +458,7 @@ module Embulk
|
|
452
458
|
Embulk.logger.error {
|
453
459
|
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
454
460
|
}
|
455
|
-
raise Error, "failed to create table #{@
|
461
|
+
raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
456
462
|
end
|
457
463
|
end
|
458
464
|
|
@@ -469,8 +475,8 @@ module Embulk
|
|
469
475
|
def delete_table_or_partition(table, dataset: nil)
|
470
476
|
begin
|
471
477
|
dataset ||= @dataset
|
472
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@
|
473
|
-
with_network_retry { client.delete_table(@
|
478
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@destination_project}:#{dataset}.#{table}" }
|
479
|
+
with_network_retry { client.delete_table(@destination_project, dataset, table) }
|
474
480
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
475
481
|
if e.status_code == 404 && /Not found:/ =~ e.message
|
476
482
|
# ignore 'Not Found' error
|
@@ -479,9 +485,9 @@ module Embulk
|
|
479
485
|
|
480
486
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
481
487
|
Embulk.logger.error {
|
482
|
-
"embulk-output-bigquery: delete_table(#{@
|
488
|
+
"embulk-output-bigquery: delete_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
|
483
489
|
}
|
484
|
-
raise Error, "failed to delete table #{@
|
490
|
+
raise Error, "failed to delete table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
|
485
491
|
end
|
486
492
|
end
|
487
493
|
|
@@ -497,18 +503,18 @@ module Embulk
|
|
497
503
|
def get_table_or_partition(table, dataset: nil)
|
498
504
|
begin
|
499
505
|
dataset ||= @dataset
|
500
|
-
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@
|
501
|
-
with_network_retry { client.get_table(@
|
506
|
+
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@destination_project}:#{dataset}.#{table}" }
|
507
|
+
with_network_retry { client.get_table(@destination_project, dataset, table) }
|
502
508
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
503
509
|
if e.status_code == 404
|
504
|
-
raise NotFoundError, "Table #{@
|
510
|
+
raise NotFoundError, "Table #{@destination_project}:#{dataset}.#{table} is not found"
|
505
511
|
end
|
506
512
|
|
507
513
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
508
514
|
Embulk.logger.error {
|
509
|
-
"embulk-output-bigquery: get_table(#{@
|
515
|
+
"embulk-output-bigquery: get_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
|
510
516
|
}
|
511
|
-
raise Error, "failed to get table #{@
|
517
|
+
raise Error, "failed to get table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
|
512
518
|
end
|
513
519
|
end
|
514
520
|
end
|
@@ -16,6 +16,7 @@ module Embulk
|
|
16
16
|
super(task, scope, client_class)
|
17
17
|
|
18
18
|
@project = @task['project']
|
19
|
+
@destination_project = @task['destination_project']
|
19
20
|
@bucket = @task['gcs_bucket']
|
20
21
|
@location = @task['location']
|
21
22
|
end
|
@@ -23,7 +24,7 @@ module Embulk
|
|
23
24
|
def insert_temporary_bucket(bucket = nil)
|
24
25
|
bucket ||= @bucket
|
25
26
|
begin
|
26
|
-
Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@
|
27
|
+
Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@destination_project}:#{bucket}" }
|
27
28
|
body = {
|
28
29
|
name: bucket,
|
29
30
|
lifecycle: {
|
@@ -57,7 +58,7 @@ module Embulk
|
|
57
58
|
Embulk.logger.error {
|
58
59
|
"embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
|
59
60
|
}
|
60
|
-
raise Error, "failed to insert bucket #{@
|
61
|
+
raise Error, "failed to insert bucket #{@destination_project}:#{bucket}, response:#{response}"
|
61
62
|
end
|
62
63
|
end
|
63
64
|
|
@@ -69,7 +70,7 @@ module Embulk
|
|
69
70
|
|
70
71
|
started = Time.now
|
71
72
|
begin
|
72
|
-
Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@
|
73
|
+
Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@destination_project}:#{object_uri}" }
|
73
74
|
body = {
|
74
75
|
name: object,
|
75
76
|
}
|
@@ -86,7 +87,7 @@ module Embulk
|
|
86
87
|
Embulk.logger.error {
|
87
88
|
"embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
|
88
89
|
}
|
89
|
-
raise Error, "failed to insert object #{@
|
90
|
+
raise Error, "failed to insert object #{@destination_project}:#{object_uri}, response:#{response}"
|
90
91
|
end
|
91
92
|
end
|
92
93
|
|
@@ -109,7 +110,7 @@ module Embulk
|
|
109
110
|
object = object.start_with?('/') ? object[1..-1] : object
|
110
111
|
object_uri = URI.join("gs://#{bucket}", object).to_s
|
111
112
|
begin
|
112
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@
|
113
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@destination_project}:#{object_uri}" }
|
113
114
|
opts = {}
|
114
115
|
|
115
116
|
Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
|
@@ -122,7 +123,7 @@ module Embulk
|
|
122
123
|
Embulk.logger.error {
|
123
124
|
"embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
|
124
125
|
}
|
125
|
-
raise Error, "failed to delete object #{@
|
126
|
+
raise Error, "failed to delete object #{@destination_project}:#{object_uri}, response:#{response}"
|
126
127
|
end
|
127
128
|
end
|
128
129
|
end
|
@@ -50,7 +50,9 @@ module Embulk
|
|
50
50
|
begin
|
51
51
|
yield
|
52
52
|
rescue ::Java::Java.net.SocketException, ::Java::Java.net.ConnectException => e
|
53
|
-
if ['Broken pipe', 'Connection reset', 'Connection timed out'].include?(
|
53
|
+
if ['Broken pipe', 'Connection reset', 'Connection timed out'].select { |x| e.message.include?(x) }.empty?
|
54
|
+
raise e
|
55
|
+
else
|
54
56
|
if retries < @task['retries']
|
55
57
|
retries += 1
|
56
58
|
Embulk.logger.warn { "embulk-output-bigquery: retry \##{retries}, #{e.class} #{e.message}" }
|
@@ -59,8 +61,6 @@ module Embulk
|
|
59
61
|
Embulk.logger.error { "embulk-output-bigquery: retry exhausted \##{retries}, #{e.class} #{e.message}" }
|
60
62
|
raise e
|
61
63
|
end
|
62
|
-
else
|
63
|
-
raise e
|
64
64
|
end
|
65
65
|
end
|
66
66
|
end
|
@@ -210,6 +210,20 @@ module Embulk
|
|
210
210
|
TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
|
211
211
|
end
|
212
212
|
}
|
213
|
+
when 'DATETIME'
|
214
|
+
if @timestamp_format
|
215
|
+
Proc.new {|val|
|
216
|
+
next nil if val.nil?
|
217
|
+
with_typecast_error(val) do |val|
|
218
|
+
Time.strptime(val, @timestamp_format).strftime("%Y-%m-%d %H:%M:%S.%6N")
|
219
|
+
end
|
220
|
+
}
|
221
|
+
else
|
222
|
+
Proc.new {|val|
|
223
|
+
next nil if val.nil?
|
224
|
+
val # Users must care of BQ timestamp format
|
225
|
+
}
|
226
|
+
end
|
213
227
|
when 'RECORD'
|
214
228
|
Proc.new {|val|
|
215
229
|
next nil if val.nil?
|
@@ -252,6 +266,11 @@ module Embulk
|
|
252
266
|
next nil if val.nil?
|
253
267
|
val.localtime(zone_offset).strftime("%Y-%m-%d")
|
254
268
|
}
|
269
|
+
when 'DATETIME'
|
270
|
+
Proc.new {|val|
|
271
|
+
next nil if val.nil?
|
272
|
+
val.localtime(zone_offset).strftime("%Y-%m-%d %H:%M:%S.%6N")
|
273
|
+
}
|
255
274
|
else
|
256
275
|
raise NotSupportedType, "cannot take column type #{type} for timestamp column"
|
257
276
|
end
|
@@ -36,6 +36,7 @@ module Embulk
|
|
36
36
|
'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
|
37
37
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
38
38
|
'project' => config.param('project', :string, :default => nil),
|
39
|
+
'destination_project' => config.param('destination_project', :string, :default => nil),
|
39
40
|
'dataset' => config.param('dataset', :string),
|
40
41
|
'location' => config.param('location', :string, :default => nil),
|
41
42
|
'table' => config.param('table', :string),
|
@@ -89,6 +90,8 @@ module Embulk
|
|
89
90
|
'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
|
90
91
|
'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
|
91
92
|
|
93
|
+
'temporary_table_expiration' => config.param('temporary_table_expiration', :integer, :default => nil),
|
94
|
+
|
92
95
|
# for debug
|
93
96
|
'skip_load' => config.param('skip_load', :bool, :default => false),
|
94
97
|
'temp_table' => config.param('temp_table', :string, :default => nil),
|
@@ -135,12 +138,13 @@ module Embulk
|
|
135
138
|
json_key = JSON.parse(task['json_keyfile'])
|
136
139
|
task['project'] ||= json_key['project_id']
|
137
140
|
rescue => e
|
138
|
-
raise ConfigError.new "json_keyfile
|
141
|
+
raise ConfigError.new "Parsing 'json_keyfile' failed with error: #{e.class} #{e.message}"
|
139
142
|
end
|
140
143
|
end
|
141
144
|
if task['project'].nil?
|
142
145
|
raise ConfigError.new "Required field \"project\" is not set"
|
143
146
|
end
|
147
|
+
task['destination_project'] ||= task['project']
|
144
148
|
|
145
149
|
if (task['payload_column'] or task['payload_column_index']) and task['auto_create_table']
|
146
150
|
if task['schema_file'].nil? and task['template_table'].nil?
|
@@ -166,7 +170,7 @@ module Embulk
|
|
166
170
|
begin
|
167
171
|
JSON.parse(File.read(task['schema_file']))
|
168
172
|
rescue => e
|
169
|
-
raise ConfigError.new "schema_file #{task['schema_file']}
|
173
|
+
raise ConfigError.new "Parsing 'schema_file' #{task['schema_file']} failed with error: #{e.class} #{e.message}"
|
170
174
|
end
|
171
175
|
end
|
172
176
|
|
@@ -298,19 +302,23 @@ module Embulk
|
|
298
302
|
end
|
299
303
|
end
|
300
304
|
|
305
|
+
temp_table_expiration = task['temporary_table_expiration']
|
306
|
+
temp_options = {'expiration_time' => temp_table_expiration}
|
307
|
+
|
301
308
|
case task['mode']
|
302
309
|
when 'delete_in_advance'
|
303
310
|
bigquery.delete_table_or_partition(task['table'])
|
304
311
|
bigquery.create_table_if_not_exists(task['table'])
|
305
312
|
when 'replace'
|
306
|
-
bigquery.create_table_if_not_exists(task['temp_table'])
|
313
|
+
bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
|
307
314
|
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
308
315
|
when 'append'
|
309
|
-
bigquery.create_table_if_not_exists(task['temp_table'])
|
316
|
+
bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
|
310
317
|
bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
|
311
318
|
when 'replace_backup'
|
312
|
-
bigquery.create_table_if_not_exists(task['temp_table'])
|
319
|
+
bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
|
313
320
|
bigquery.create_table_if_not_exists(task['table'])
|
321
|
+
|
314
322
|
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
|
315
323
|
else # append_direct
|
316
324
|
if task['auto_create_table']
|
@@ -29,6 +29,7 @@ else
|
|
29
29
|
def least_task
|
30
30
|
{
|
31
31
|
'project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
|
32
|
+
'destination_project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
|
32
33
|
'dataset' => 'your_dataset_name',
|
33
34
|
'table' => 'your_table_name',
|
34
35
|
'auth_method' => 'json_key',
|
data/test/test_configure.rb
CHANGED
@@ -45,6 +45,7 @@ module Embulk
|
|
45
45
|
assert_equal "application_default", task['auth_method']
|
46
46
|
assert_equal nil, task['json_keyfile']
|
47
47
|
assert_equal "your_project_name", task['project']
|
48
|
+
assert_equal "your_project_name", task['destination_project']
|
48
49
|
assert_equal "your_dataset_name", task['dataset']
|
49
50
|
assert_equal nil, task['location']
|
50
51
|
assert_equal "your_table_name", task['table']
|
@@ -284,6 +285,16 @@ module Embulk
|
|
284
285
|
config = least_config.merge('schema_update_options' => ['FOO'])
|
285
286
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
286
287
|
end
|
288
|
+
|
289
|
+
def test_destination_project
|
290
|
+
config = least_config.merge('destination_project' => 'your_destination_project_name')
|
291
|
+
task = Bigquery.configure(config, schema, processor_count)
|
292
|
+
|
293
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
294
|
+
assert_equal 'your_destination_project_name', task['destination_project']
|
295
|
+
assert_equal 'your_project_name', task['project']
|
296
|
+
end
|
297
|
+
|
287
298
|
end
|
288
299
|
end
|
289
300
|
end
|
data/test/test_helper.rb
CHANGED
@@ -63,7 +63,8 @@ module Embulk
|
|
63
63
|
Column.new({index: 3, name: 'string', type: :string}),
|
64
64
|
Column.new({index: 4, name: 'timestamp', type: :timestamp}),
|
65
65
|
Column.new({index: 5, name: 'date', type: :timestamp}),
|
66
|
-
Column.new({index: 6, name: '
|
66
|
+
Column.new({index: 6, name: 'datetime', type: :timestamp}),
|
67
|
+
Column.new({index: 7, name: 'json', type: :json}),
|
67
68
|
])
|
68
69
|
task = {
|
69
70
|
'column_options' => [
|
@@ -73,6 +74,7 @@ module Embulk
|
|
73
74
|
{'name' => 'string', 'type' => 'INTEGER'},
|
74
75
|
{'name' => 'timestamp', 'type' => 'INTEGER'},
|
75
76
|
{'name' => 'date', 'type' => 'DATE'},
|
77
|
+
{'name' => 'datetime', 'type' => 'DATETIME'},
|
76
78
|
{'name' => 'json', 'type' => 'RECORD', 'fields' => [
|
77
79
|
{ 'name' => 'key1', 'type' => 'STRING' },
|
78
80
|
]},
|
@@ -85,6 +87,7 @@ module Embulk
|
|
85
87
|
{name: 'string', type: 'INTEGER'},
|
86
88
|
{name: 'timestamp', type: 'INTEGER'},
|
87
89
|
{name: 'date', type: 'DATE'},
|
90
|
+
{name: 'datetime', type: 'DATETIME'},
|
88
91
|
{name: 'json', type: 'RECORD', fields: [
|
89
92
|
{name: 'key1', type: 'STRING'},
|
90
93
|
]},
|
@@ -94,6 +94,10 @@ module Embulk
|
|
94
94
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
95
95
|
end
|
96
96
|
|
97
|
+
def test_datetime
|
98
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
99
|
+
end
|
100
|
+
|
97
101
|
def test_record
|
98
102
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
99
103
|
end
|
@@ -138,6 +142,10 @@ module Embulk
|
|
138
142
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
139
143
|
end
|
140
144
|
|
145
|
+
def test_datetime
|
146
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
147
|
+
end
|
148
|
+
|
141
149
|
def test_record
|
142
150
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
143
151
|
end
|
@@ -178,6 +186,10 @@ module Embulk
|
|
178
186
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
179
187
|
end
|
180
188
|
|
189
|
+
def test_datetime
|
190
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
191
|
+
end
|
192
|
+
|
181
193
|
def test_record
|
182
194
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
183
195
|
end
|
@@ -236,6 +248,20 @@ module Embulk
|
|
236
248
|
assert_raise { converter.call('foo') }
|
237
249
|
end
|
238
250
|
|
251
|
+
def test_datetime
|
252
|
+
converter = ValueConverterFactory.new(
|
253
|
+
SCHEMA_TYPE, 'DATETIME',
|
254
|
+
timestamp_format: '%Y/%m/%d'
|
255
|
+
).create_converter
|
256
|
+
assert_equal nil, converter.call(nil)
|
257
|
+
assert_equal "2016-02-26 00:00:00.000000", converter.call("2016/02/26")
|
258
|
+
|
259
|
+
# Users must care of BQ datetime format by themselves with no timestamp_format
|
260
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
|
261
|
+
assert_equal nil, converter.call(nil)
|
262
|
+
assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
|
263
|
+
end
|
264
|
+
|
239
265
|
def test_record
|
240
266
|
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
|
241
267
|
assert_equal({'foo'=>'foo'}, converter.call(%Q[{"foo":"foo"}]))
|
@@ -294,7 +320,7 @@ module Embulk
|
|
294
320
|
timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
|
295
321
|
expected = "2016-02-26"
|
296
322
|
assert_equal expected, converter.call(timestamp)
|
297
|
-
|
323
|
+
|
298
324
|
converter = ValueConverterFactory.new(
|
299
325
|
SCHEMA_TYPE, 'DATE', timezone: 'Asia/Tokyo'
|
300
326
|
).create_converter
|
@@ -306,6 +332,24 @@ module Embulk
|
|
306
332
|
assert_raise { converter.call('foo') }
|
307
333
|
end
|
308
334
|
|
335
|
+
def test_datetime
|
336
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
|
337
|
+
assert_equal nil, converter.call(nil)
|
338
|
+
timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
|
339
|
+
expected = "2016-02-26 00:00:00.500000"
|
340
|
+
assert_equal expected, converter.call(timestamp)
|
341
|
+
|
342
|
+
converter = ValueConverterFactory.new(
|
343
|
+
SCHEMA_TYPE, 'DATETIME', timezone: 'Asia/Tokyo'
|
344
|
+
).create_converter
|
345
|
+
assert_equal nil, converter.call(nil)
|
346
|
+
timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
|
347
|
+
expected = "2016-02-26 00:00:00.500000"
|
348
|
+
assert_equal expected, converter.call(timestamp)
|
349
|
+
|
350
|
+
assert_raise { converter.call('foo') }
|
351
|
+
end
|
352
|
+
|
309
353
|
def test_record
|
310
354
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
311
355
|
end
|
metadata
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
- Naotoshi Seo
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-09-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
+
name: signet
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
18
|
- - "~>"
|
@@ -20,9 +21,8 @@ dependencies:
|
|
20
21
|
- - "<"
|
21
22
|
- !ruby/object:Gem::Version
|
22
23
|
version: 0.12.0
|
23
|
-
name: signet
|
24
|
-
prerelease: false
|
25
24
|
type: :runtime
|
25
|
+
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
27
27
|
requirements:
|
28
28
|
- - "~>"
|
@@ -32,56 +32,90 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.12.0
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
|
+
name: google-api-client
|
35
36
|
requirement: !ruby/object:Gem::Requirement
|
36
37
|
requirements:
|
37
38
|
- - "<"
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: 0.33.0
|
40
|
-
name: google-api-client
|
41
|
-
prerelease: false
|
42
41
|
type: :runtime
|
42
|
+
prerelease: false
|
43
43
|
version_requirements: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "<"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: 0.33.0
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
|
+
name: time_with_zone
|
49
50
|
requirement: !ruby/object:Gem::Requirement
|
50
51
|
requirements:
|
51
52
|
- - ">="
|
52
53
|
- !ruby/object:Gem::Version
|
53
54
|
version: '0'
|
54
|
-
name: time_with_zone
|
55
|
-
prerelease: false
|
56
55
|
type: :runtime
|
56
|
+
prerelease: false
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
|
+
name: representable
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 3.0.0
|
69
|
+
- - "<"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '3.1'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - "~>"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 3.0.0
|
79
|
+
- - "<"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '3.1'
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: faraday
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0.12'
|
89
|
+
type: :runtime
|
90
|
+
prerelease: false
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0.12'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: bundler
|
63
98
|
requirement: !ruby/object:Gem::Requirement
|
64
99
|
requirements:
|
65
100
|
- - ">="
|
66
101
|
- !ruby/object:Gem::Version
|
67
102
|
version: 1.10.6
|
68
|
-
name: bundler
|
69
|
-
prerelease: false
|
70
103
|
type: :development
|
104
|
+
prerelease: false
|
71
105
|
version_requirements: !ruby/object:Gem::Requirement
|
72
106
|
requirements:
|
73
107
|
- - ">="
|
74
108
|
- !ruby/object:Gem::Version
|
75
109
|
version: 1.10.6
|
76
110
|
- !ruby/object:Gem::Dependency
|
111
|
+
name: rake
|
77
112
|
requirement: !ruby/object:Gem::Requirement
|
78
113
|
requirements:
|
79
114
|
- - ">="
|
80
115
|
- !ruby/object:Gem::Version
|
81
116
|
version: '10.0'
|
82
|
-
name: rake
|
83
|
-
prerelease: false
|
84
117
|
type: :development
|
118
|
+
prerelease: false
|
85
119
|
version_requirements: !ruby/object:Gem::Requirement
|
86
120
|
requirements:
|
87
121
|
- - ">="
|
@@ -123,7 +157,7 @@ homepage: https://github.com/embulk/embulk-output-bigquery
|
|
123
157
|
licenses:
|
124
158
|
- MIT
|
125
159
|
metadata: {}
|
126
|
-
post_install_message:
|
160
|
+
post_install_message:
|
127
161
|
rdoc_options: []
|
128
162
|
require_paths:
|
129
163
|
- lib
|
@@ -138,9 +172,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
172
|
- !ruby/object:Gem::Version
|
139
173
|
version: '0'
|
140
174
|
requirements: []
|
141
|
-
|
142
|
-
|
143
|
-
signing_key:
|
175
|
+
rubygems_version: 3.0.3
|
176
|
+
signing_key:
|
144
177
|
specification_version: 4
|
145
178
|
summary: Google BigQuery output plugin for Embulk
|
146
179
|
test_files:
|