embulk-output-bigquery 0.6.1 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/Gemfile +1 -1
- data/README.md +5 -4
- data/embulk-output-bigquery.gemspec +12 -2
- data/lib/embulk/output/bigquery.rb +4 -2
- data/lib/embulk/output/bigquery/bigquery_client.rb +30 -29
- data/lib/embulk/output/bigquery/gcs_client.rb +7 -6
- data/lib/embulk/output/bigquery/google_client.rb +3 -3
- data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
- data/test/test_bigquery_client.rb +1 -0
- data/test/test_configure.rb +11 -0
- data/test/test_helper.rb +7 -1
- data/test/test_value_converter_factory.rb +86 -0
- metadata +70 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d48b65d07302466f8f52dadb559ad049a054453db3741d4384209125e7b9e9cd
|
4
|
+
data.tar.gz: 13cd70568cfaebba819a9b7a9a51d1c45ff9f1599893b4a0b451e82dc84e40c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee51c9bf570ce2f2a55e43a5ab7842f1669d814b93cdd1395853be8f6b3ff770f6a5a6c9f9a81b6c0eca1e9bc72aff3d1302d37760fe559ecfa33a740e1da724
|
7
|
+
data.tar.gz: 8ada113513a089d786bf93bce1de98ad4bcc900ff73931c164a801b29e0bad9fd4b001bdf85962570998df995209e4ff320ba74e1fae22b61fb389a621121073
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
## 0.6.6 - 2021-06-10
|
2
|
+
|
3
|
+
* [maintenance] Fix network retry function (thanks to @case-k-git)
|
4
|
+
* [enhancement] Allow to specify the billing project and the project to which the data will be loaded separately (thanks to @ck-fm0211)
|
5
|
+
* [enhancement] Include original error message on json parse error (thanks to @k-yomo)
|
6
|
+
|
7
|
+
## 0.6.5 - 2021-06-10
|
8
|
+
* [maintenance] Fix failed tests (thanks to @kyoshidajp)
|
9
|
+
* [maintenance] Lock representable version for avoiding requiring Ruby 2.4 (thanks to @hiroyuki-sato)
|
10
|
+
|
11
|
+
## 0.6.4 - 2019-11-06
|
12
|
+
|
13
|
+
* [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
|
14
|
+
|
15
|
+
## 0.6.3 - 2019-10-28
|
16
|
+
|
17
|
+
* [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
|
18
|
+
|
19
|
+
## 0.6.2 - 2019-10-16
|
20
|
+
|
21
|
+
* [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
|
22
|
+
|
1
23
|
## 0.6.1 - 2019-08-28
|
2
24
|
|
3
25
|
* [maintenance] Release a new gem not to include symlinks to make it work on Windows.
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,7 @@ OAuth flow for installed applications.
|
|
33
33
|
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
34
|
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
35
|
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
36
|
+
| destination_project | string | optional | `project` value | A destination project to which the data will be loaded. Use this if you want to separate a billing project (the `project` value) and a destination project (the `destination_project` value). |
|
36
37
|
| dataset | string | required | | dataset |
|
37
38
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
38
39
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
@@ -307,17 +308,17 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
307
308
|
|
308
309
|
- **column_options**: advanced: an array of options for columns
|
309
310
|
- **name**: column name
|
310
|
-
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
|
311
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
|
311
312
|
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
312
313
|
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
313
314
|
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
314
|
-
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
|
315
|
-
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
|
315
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
|
316
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
|
316
317
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
317
318
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
318
319
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
319
320
|
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
320
|
-
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
321
|
+
- **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
|
321
322
|
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
322
323
|
- **default_timezone**: default timezone for column_options (string, default is "UTC")
|
323
324
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.6.
|
3
|
+
spec.version = "0.6.6"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -14,8 +14,18 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
|
17
|
-
|
17
|
+
# TODO
|
18
|
+
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
|
19
|
+
# Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
|
20
|
+
# So, force install signet < 0.12 and google-api-client < 0.33.0
|
21
|
+
# Also, representable version >= 3.1.0 requires Ruby version >= 2.4
|
22
|
+
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
|
23
|
+
spec.add_dependency 'google-api-client','< 0.33.0'
|
18
24
|
spec.add_dependency 'time_with_zone'
|
25
|
+
spec.add_dependency "representable", ['~> 3.0.0', '< 3.1']
|
26
|
+
# faraday 1.1.0 require >= Ruby 2.4.
|
27
|
+
# googleauth 0.9.0 requires faraday ~> 0.12
|
28
|
+
spec.add_dependency "faraday", '~> 0.12'
|
19
29
|
|
20
30
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
21
31
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
@@ -36,6 +36,7 @@ module Embulk
|
|
36
36
|
'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
|
37
37
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
38
38
|
'project' => config.param('project', :string, :default => nil),
|
39
|
+
'destination_project' => config.param('destination_project', :string, :default => nil),
|
39
40
|
'dataset' => config.param('dataset', :string),
|
40
41
|
'location' => config.param('location', :string, :default => nil),
|
41
42
|
'table' => config.param('table', :string),
|
@@ -135,12 +136,13 @@ module Embulk
|
|
135
136
|
json_key = JSON.parse(task['json_keyfile'])
|
136
137
|
task['project'] ||= json_key['project_id']
|
137
138
|
rescue => e
|
138
|
-
raise ConfigError.new "json_keyfile
|
139
|
+
raise ConfigError.new "Parsing 'json_keyfile' failed with error: #{e.class} #{e.message}"
|
139
140
|
end
|
140
141
|
end
|
141
142
|
if task['project'].nil?
|
142
143
|
raise ConfigError.new "Required field \"project\" is not set"
|
143
144
|
end
|
145
|
+
task['destination_project'] ||= task['project']
|
144
146
|
|
145
147
|
if (task['payload_column'] or task['payload_column_index']) and task['auto_create_table']
|
146
148
|
if task['schema_file'].nil? and task['template_table'].nil?
|
@@ -166,7 +168,7 @@ module Embulk
|
|
166
168
|
begin
|
167
169
|
JSON.parse(File.read(task['schema_file']))
|
168
170
|
rescue => e
|
169
|
-
raise ConfigError.new "schema_file #{task['schema_file']}
|
171
|
+
raise ConfigError.new "Parsing 'schema_file' #{task['schema_file']} failed with error: #{e.class} #{e.message}"
|
170
172
|
end
|
171
173
|
end
|
172
174
|
|
@@ -18,6 +18,7 @@ module Embulk
|
|
18
18
|
@schema = schema
|
19
19
|
reset_fields(fields) if fields
|
20
20
|
@project = @task['project']
|
21
|
+
@destination_project = @task['destination_project']
|
21
22
|
@dataset = @task['dataset']
|
22
23
|
@location = @task['location']
|
23
24
|
@location_for_log = @location.nil? ? 'us/eu' : @location
|
@@ -80,7 +81,7 @@ module Embulk
|
|
80
81
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
81
82
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
82
83
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
83
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@
|
84
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
84
85
|
|
85
86
|
body = {
|
86
87
|
job_reference: {
|
@@ -90,7 +91,7 @@ module Embulk
|
|
90
91
|
configuration: {
|
91
92
|
load: {
|
92
93
|
destination_table: {
|
93
|
-
project_id: @
|
94
|
+
project_id: @destination_project,
|
94
95
|
dataset_id: @dataset,
|
95
96
|
table_id: table,
|
96
97
|
},
|
@@ -130,7 +131,7 @@ module Embulk
|
|
130
131
|
Embulk.logger.error {
|
131
132
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
132
133
|
}
|
133
|
-
raise Error, "failed to load #{object_uris} to #{@
|
134
|
+
raise Error, "failed to load #{object_uris} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
134
135
|
end
|
135
136
|
end
|
136
137
|
end
|
@@ -171,7 +172,7 @@ module Embulk
|
|
171
172
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
172
173
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
173
174
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
174
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@
|
175
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
175
176
|
else
|
176
177
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
177
178
|
return
|
@@ -185,7 +186,7 @@ module Embulk
|
|
185
186
|
configuration: {
|
186
187
|
load: {
|
187
188
|
destination_table: {
|
188
|
-
project_id: @
|
189
|
+
project_id: @destination_project,
|
189
190
|
dataset_id: @dataset,
|
190
191
|
table_id: table,
|
191
192
|
},
|
@@ -232,7 +233,7 @@ module Embulk
|
|
232
233
|
Embulk.logger.error {
|
233
234
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
234
235
|
}
|
235
|
-
raise Error, "failed to load #{path} to #{@
|
236
|
+
raise Error, "failed to load #{path} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
236
237
|
end
|
237
238
|
end
|
238
239
|
end
|
@@ -245,7 +246,7 @@ module Embulk
|
|
245
246
|
|
246
247
|
Embulk.logger.info {
|
247
248
|
"embulk-output-bigquery: Copy job starting... job_id:[#{job_id}] " \
|
248
|
-
"#{@
|
249
|
+
"#{@destination_project}:#{@dataset}.#{source_table} => #{@destination_project}:#{destination_dataset}.#{destination_table}"
|
249
250
|
}
|
250
251
|
|
251
252
|
body = {
|
@@ -258,12 +259,12 @@ module Embulk
|
|
258
259
|
create_deposition: 'CREATE_IF_NEEDED',
|
259
260
|
write_disposition: write_disposition,
|
260
261
|
source_table: {
|
261
|
-
project_id: @
|
262
|
+
project_id: @destination_project,
|
262
263
|
dataset_id: @dataset,
|
263
264
|
table_id: source_table,
|
264
265
|
},
|
265
266
|
destination_table: {
|
266
|
-
project_id: @
|
267
|
+
project_id: @destination_project,
|
267
268
|
dataset_id: destination_dataset,
|
268
269
|
table_id: destination_table,
|
269
270
|
},
|
@@ -284,8 +285,8 @@ module Embulk
|
|
284
285
|
Embulk.logger.error {
|
285
286
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
286
287
|
}
|
287
|
-
raise Error, "failed to copy #{@
|
288
|
-
"to #{@
|
288
|
+
raise Error, "failed to copy #{@destination_project}:#{@dataset}.#{source_table} " \
|
289
|
+
"to #{@destination_project}:#{destination_dataset}.#{destination_table}, response:#{response}"
|
289
290
|
end
|
290
291
|
end
|
291
292
|
end
|
@@ -354,7 +355,7 @@ module Embulk
|
|
354
355
|
def create_dataset(dataset = nil, reference: nil)
|
355
356
|
dataset ||= @dataset
|
356
357
|
begin
|
357
|
-
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@
|
358
|
+
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@destination_project}:#{dataset} in #{@location_for_log}" }
|
358
359
|
hint = {}
|
359
360
|
if reference
|
360
361
|
response = get_dataset(reference)
|
@@ -382,25 +383,25 @@ module Embulk
|
|
382
383
|
Embulk.logger.error {
|
383
384
|
"embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
|
384
385
|
}
|
385
|
-
raise Error, "failed to create dataset #{@
|
386
|
+
raise Error, "failed to create dataset #{@destination_project}:#{dataset} in #{@location_for_log}, response:#{response}"
|
386
387
|
end
|
387
388
|
end
|
388
389
|
|
389
390
|
def get_dataset(dataset = nil)
|
390
391
|
dataset ||= @dataset
|
391
392
|
begin
|
392
|
-
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@
|
393
|
-
with_network_retry { client.get_dataset(@
|
393
|
+
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@destination_project}:#{dataset}" }
|
394
|
+
with_network_retry { client.get_dataset(@destination_project, dataset) }
|
394
395
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
395
396
|
if e.status_code == 404
|
396
|
-
raise NotFoundError, "Dataset #{@
|
397
|
+
raise NotFoundError, "Dataset #{@destination_project}:#{dataset} is not found"
|
397
398
|
end
|
398
399
|
|
399
400
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
400
401
|
Embulk.logger.error {
|
401
|
-
"embulk-output-bigquery: get_dataset(#{@
|
402
|
+
"embulk-output-bigquery: get_dataset(#{@destination_project}, #{dataset}), response:#{response}"
|
402
403
|
}
|
403
|
-
raise Error, "failed to get dataset #{@
|
404
|
+
raise Error, "failed to get dataset #{@destination_project}:#{dataset}, response:#{response}"
|
404
405
|
end
|
405
406
|
end
|
406
407
|
|
@@ -414,7 +415,7 @@ module Embulk
|
|
414
415
|
table = Helper.chomp_partition_decorator(table)
|
415
416
|
end
|
416
417
|
|
417
|
-
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@
|
418
|
+
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@destination_project}:#{dataset}.#{table}" }
|
418
419
|
body = {
|
419
420
|
table_reference: {
|
420
421
|
table_id: table,
|
@@ -452,7 +453,7 @@ module Embulk
|
|
452
453
|
Embulk.logger.error {
|
453
454
|
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
454
455
|
}
|
455
|
-
raise Error, "failed to create table #{@
|
456
|
+
raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
456
457
|
end
|
457
458
|
end
|
458
459
|
|
@@ -469,8 +470,8 @@ module Embulk
|
|
469
470
|
def delete_table_or_partition(table, dataset: nil)
|
470
471
|
begin
|
471
472
|
dataset ||= @dataset
|
472
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@
|
473
|
-
with_network_retry { client.delete_table(@
|
473
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@destination_project}:#{dataset}.#{table}" }
|
474
|
+
with_network_retry { client.delete_table(@destination_project, dataset, table) }
|
474
475
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
475
476
|
if e.status_code == 404 && /Not found:/ =~ e.message
|
476
477
|
# ignore 'Not Found' error
|
@@ -479,9 +480,9 @@ module Embulk
|
|
479
480
|
|
480
481
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
481
482
|
Embulk.logger.error {
|
482
|
-
"embulk-output-bigquery: delete_table(#{@
|
483
|
+
"embulk-output-bigquery: delete_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
|
483
484
|
}
|
484
|
-
raise Error, "failed to delete table #{@
|
485
|
+
raise Error, "failed to delete table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
|
485
486
|
end
|
486
487
|
end
|
487
488
|
|
@@ -497,18 +498,18 @@ module Embulk
|
|
497
498
|
def get_table_or_partition(table, dataset: nil)
|
498
499
|
begin
|
499
500
|
dataset ||= @dataset
|
500
|
-
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@
|
501
|
-
with_network_retry { client.get_table(@
|
501
|
+
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@destination_project}:#{dataset}.#{table}" }
|
502
|
+
with_network_retry { client.get_table(@destination_project, dataset, table) }
|
502
503
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
503
504
|
if e.status_code == 404
|
504
|
-
raise NotFoundError, "Table #{@
|
505
|
+
raise NotFoundError, "Table #{@destination_project}:#{dataset}.#{table} is not found"
|
505
506
|
end
|
506
507
|
|
507
508
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
508
509
|
Embulk.logger.error {
|
509
|
-
"embulk-output-bigquery: get_table(#{@
|
510
|
+
"embulk-output-bigquery: get_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
|
510
511
|
}
|
511
|
-
raise Error, "failed to get table #{@
|
512
|
+
raise Error, "failed to get table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
|
512
513
|
end
|
513
514
|
end
|
514
515
|
end
|
@@ -16,6 +16,7 @@ module Embulk
|
|
16
16
|
super(task, scope, client_class)
|
17
17
|
|
18
18
|
@project = @task['project']
|
19
|
+
@destination_project = @task['destination_project']
|
19
20
|
@bucket = @task['gcs_bucket']
|
20
21
|
@location = @task['location']
|
21
22
|
end
|
@@ -23,7 +24,7 @@ module Embulk
|
|
23
24
|
def insert_temporary_bucket(bucket = nil)
|
24
25
|
bucket ||= @bucket
|
25
26
|
begin
|
26
|
-
Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@
|
27
|
+
Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@destination_project}:#{bucket}" }
|
27
28
|
body = {
|
28
29
|
name: bucket,
|
29
30
|
lifecycle: {
|
@@ -57,7 +58,7 @@ module Embulk
|
|
57
58
|
Embulk.logger.error {
|
58
59
|
"embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
|
59
60
|
}
|
60
|
-
raise Error, "failed to insert bucket #{@
|
61
|
+
raise Error, "failed to insert bucket #{@destination_project}:#{bucket}, response:#{response}"
|
61
62
|
end
|
62
63
|
end
|
63
64
|
|
@@ -69,7 +70,7 @@ module Embulk
|
|
69
70
|
|
70
71
|
started = Time.now
|
71
72
|
begin
|
72
|
-
Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@
|
73
|
+
Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@destination_project}:#{object_uri}" }
|
73
74
|
body = {
|
74
75
|
name: object,
|
75
76
|
}
|
@@ -86,7 +87,7 @@ module Embulk
|
|
86
87
|
Embulk.logger.error {
|
87
88
|
"embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
|
88
89
|
}
|
89
|
-
raise Error, "failed to insert object #{@
|
90
|
+
raise Error, "failed to insert object #{@destination_project}:#{object_uri}, response:#{response}"
|
90
91
|
end
|
91
92
|
end
|
92
93
|
|
@@ -109,7 +110,7 @@ module Embulk
|
|
109
110
|
object = object.start_with?('/') ? object[1..-1] : object
|
110
111
|
object_uri = URI.join("gs://#{bucket}", object).to_s
|
111
112
|
begin
|
112
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@
|
113
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@destination_project}:#{object_uri}" }
|
113
114
|
opts = {}
|
114
115
|
|
115
116
|
Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
|
@@ -122,7 +123,7 @@ module Embulk
|
|
122
123
|
Embulk.logger.error {
|
123
124
|
"embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
|
124
125
|
}
|
125
|
-
raise Error, "failed to delete object #{@
|
126
|
+
raise Error, "failed to delete object #{@destination_project}:#{object_uri}, response:#{response}"
|
126
127
|
end
|
127
128
|
end
|
128
129
|
end
|
@@ -50,7 +50,9 @@ module Embulk
|
|
50
50
|
begin
|
51
51
|
yield
|
52
52
|
rescue ::Java::Java.net.SocketException, ::Java::Java.net.ConnectException => e
|
53
|
-
if ['Broken pipe', 'Connection reset', 'Connection timed out'].include?(
|
53
|
+
if ['Broken pipe', 'Connection reset', 'Connection timed out'].select { |x| e.message.include?(x) }.empty?
|
54
|
+
raise e
|
55
|
+
else
|
54
56
|
if retries < @task['retries']
|
55
57
|
retries += 1
|
56
58
|
Embulk.logger.warn { "embulk-output-bigquery: retry \##{retries}, #{e.class} #{e.message}" }
|
@@ -59,8 +61,6 @@ module Embulk
|
|
59
61
|
Embulk.logger.error { "embulk-output-bigquery: retry exhausted \##{retries}, #{e.class} #{e.message}" }
|
60
62
|
raise e
|
61
63
|
end
|
62
|
-
else
|
63
|
-
raise e
|
64
64
|
end
|
65
65
|
end
|
66
66
|
end
|
@@ -203,6 +203,27 @@ module Embulk
|
|
203
203
|
val # Users must care of BQ timestamp format
|
204
204
|
}
|
205
205
|
end
|
206
|
+
when 'DATE'
|
207
|
+
Proc.new {|val|
|
208
|
+
next nil if val.nil?
|
209
|
+
with_typecast_error(val) do |val|
|
210
|
+
TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
|
211
|
+
end
|
212
|
+
}
|
213
|
+
when 'DATETIME'
|
214
|
+
if @timestamp_format
|
215
|
+
Proc.new {|val|
|
216
|
+
next nil if val.nil?
|
217
|
+
with_typecast_error(val) do |val|
|
218
|
+
Time.strptime(val, @timestamp_format).strftime("%Y-%m-%d %H:%M:%S.%6N")
|
219
|
+
end
|
220
|
+
}
|
221
|
+
else
|
222
|
+
Proc.new {|val|
|
223
|
+
next nil if val.nil?
|
224
|
+
val # Users must care of BQ timestamp format
|
225
|
+
}
|
226
|
+
end
|
206
227
|
when 'RECORD'
|
207
228
|
Proc.new {|val|
|
208
229
|
next nil if val.nil?
|
@@ -240,6 +261,16 @@ module Embulk
|
|
240
261
|
next nil if val.nil?
|
241
262
|
val.strftime("%Y-%m-%d %H:%M:%S.%6N %:z")
|
242
263
|
}
|
264
|
+
when 'DATE'
|
265
|
+
Proc.new {|val|
|
266
|
+
next nil if val.nil?
|
267
|
+
val.localtime(zone_offset).strftime("%Y-%m-%d")
|
268
|
+
}
|
269
|
+
when 'DATETIME'
|
270
|
+
Proc.new {|val|
|
271
|
+
next nil if val.nil?
|
272
|
+
val.localtime(zone_offset).strftime("%Y-%m-%d %H:%M:%S.%6N")
|
273
|
+
}
|
243
274
|
else
|
244
275
|
raise NotSupportedType, "cannot take column type #{type} for timestamp column"
|
245
276
|
end
|
@@ -29,6 +29,7 @@ else
|
|
29
29
|
def least_task
|
30
30
|
{
|
31
31
|
'project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
|
32
|
+
'destination_project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
|
32
33
|
'dataset' => 'your_dataset_name',
|
33
34
|
'table' => 'your_table_name',
|
34
35
|
'auth_method' => 'json_key',
|
data/test/test_configure.rb
CHANGED
@@ -45,6 +45,7 @@ module Embulk
|
|
45
45
|
assert_equal "application_default", task['auth_method']
|
46
46
|
assert_equal nil, task['json_keyfile']
|
47
47
|
assert_equal "your_project_name", task['project']
|
48
|
+
assert_equal "your_project_name", task['destination_project']
|
48
49
|
assert_equal "your_dataset_name", task['dataset']
|
49
50
|
assert_equal nil, task['location']
|
50
51
|
assert_equal "your_table_name", task['table']
|
@@ -284,6 +285,16 @@ module Embulk
|
|
284
285
|
config = least_config.merge('schema_update_options' => ['FOO'])
|
285
286
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
286
287
|
end
|
288
|
+
|
289
|
+
def test_destination_project
|
290
|
+
config = least_config.merge('destination_project' => 'your_destination_project_name')
|
291
|
+
task = Bigquery.configure(config, schema, processor_count)
|
292
|
+
|
293
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
294
|
+
assert_equal 'your_destination_project_name', task['destination_project']
|
295
|
+
assert_equal 'your_project_name', task['project']
|
296
|
+
end
|
297
|
+
|
287
298
|
end
|
288
299
|
end
|
289
300
|
end
|
data/test/test_helper.rb
CHANGED
@@ -62,7 +62,9 @@ module Embulk
|
|
62
62
|
Column.new({index: 2, name: 'double', type: :double}),
|
63
63
|
Column.new({index: 3, name: 'string', type: :string}),
|
64
64
|
Column.new({index: 4, name: 'timestamp', type: :timestamp}),
|
65
|
-
Column.new({index: 5, name: '
|
65
|
+
Column.new({index: 5, name: 'date', type: :timestamp}),
|
66
|
+
Column.new({index: 6, name: 'datetime', type: :timestamp}),
|
67
|
+
Column.new({index: 7, name: 'json', type: :json}),
|
66
68
|
])
|
67
69
|
task = {
|
68
70
|
'column_options' => [
|
@@ -71,6 +73,8 @@ module Embulk
|
|
71
73
|
{'name' => 'double', 'type' => 'STRING'},
|
72
74
|
{'name' => 'string', 'type' => 'INTEGER'},
|
73
75
|
{'name' => 'timestamp', 'type' => 'INTEGER'},
|
76
|
+
{'name' => 'date', 'type' => 'DATE'},
|
77
|
+
{'name' => 'datetime', 'type' => 'DATETIME'},
|
74
78
|
{'name' => 'json', 'type' => 'RECORD', 'fields' => [
|
75
79
|
{ 'name' => 'key1', 'type' => 'STRING' },
|
76
80
|
]},
|
@@ -82,6 +86,8 @@ module Embulk
|
|
82
86
|
{name: 'double', type: 'STRING'},
|
83
87
|
{name: 'string', type: 'INTEGER'},
|
84
88
|
{name: 'timestamp', type: 'INTEGER'},
|
89
|
+
{name: 'date', type: 'DATE'},
|
90
|
+
{name: 'datetime', type: 'DATETIME'},
|
85
91
|
{name: 'json', type: 'RECORD', fields: [
|
86
92
|
{name: 'key1', type: 'STRING'},
|
87
93
|
]},
|
@@ -90,6 +90,14 @@ module Embulk
|
|
90
90
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'TIMESTAMP').create_converter }
|
91
91
|
end
|
92
92
|
|
93
|
+
def test_date
|
94
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_datetime
|
98
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
99
|
+
end
|
100
|
+
|
93
101
|
def test_record
|
94
102
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
95
103
|
end
|
@@ -130,6 +138,14 @@ module Embulk
|
|
130
138
|
assert_equal 1408452095, converter.call(1408452095)
|
131
139
|
end
|
132
140
|
|
141
|
+
def test_date
|
142
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_datetime
|
146
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
147
|
+
end
|
148
|
+
|
133
149
|
def test_record
|
134
150
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
135
151
|
end
|
@@ -166,6 +182,14 @@ module Embulk
|
|
166
182
|
assert_equal 1408452095.188766, converter.call(1408452095.188766)
|
167
183
|
end
|
168
184
|
|
185
|
+
def test_date
|
186
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_datetime
|
190
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
|
191
|
+
end
|
192
|
+
|
169
193
|
def test_record
|
170
194
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
171
195
|
end
|
@@ -216,6 +240,28 @@ module Embulk
|
|
216
240
|
assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
|
217
241
|
end
|
218
242
|
|
243
|
+
def test_date
|
244
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter
|
245
|
+
assert_equal nil, converter.call(nil)
|
246
|
+
assert_equal "2016-02-26", converter.call("2016-02-26")
|
247
|
+
assert_equal "2016-02-26", converter.call("2016-02-26 00:00:00")
|
248
|
+
assert_raise { converter.call('foo') }
|
249
|
+
end
|
250
|
+
|
251
|
+
def test_datetime
|
252
|
+
converter = ValueConverterFactory.new(
|
253
|
+
SCHEMA_TYPE, 'DATETIME',
|
254
|
+
timestamp_format: '%Y/%m/%d'
|
255
|
+
).create_converter
|
256
|
+
assert_equal nil, converter.call(nil)
|
257
|
+
assert_equal "2016-02-26 00:00:00.000000", converter.call("2016/02/26")
|
258
|
+
|
259
|
+
# Users must care of BQ datetime format by themselves with no timestamp_format
|
260
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
|
261
|
+
assert_equal nil, converter.call(nil)
|
262
|
+
assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
|
263
|
+
end
|
264
|
+
|
219
265
|
def test_record
|
220
266
|
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
|
221
267
|
assert_equal({'foo'=>'foo'}, converter.call(%Q[{"foo":"foo"}]))
|
@@ -268,6 +314,42 @@ module Embulk
|
|
268
314
|
assert_equal expected, converter.call(Time.at(subject).utc)
|
269
315
|
end
|
270
316
|
|
317
|
+
def test_date
|
318
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter
|
319
|
+
assert_equal nil, converter.call(nil)
|
320
|
+
timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
|
321
|
+
expected = "2016-02-26"
|
322
|
+
assert_equal expected, converter.call(timestamp)
|
323
|
+
|
324
|
+
converter = ValueConverterFactory.new(
|
325
|
+
SCHEMA_TYPE, 'DATE', timezone: 'Asia/Tokyo'
|
326
|
+
).create_converter
|
327
|
+
assert_equal nil, converter.call(nil)
|
328
|
+
timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
|
329
|
+
expected = "2016-02-26"
|
330
|
+
assert_equal expected, converter.call(timestamp)
|
331
|
+
|
332
|
+
assert_raise { converter.call('foo') }
|
333
|
+
end
|
334
|
+
|
335
|
+
def test_datetime
|
336
|
+
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
|
337
|
+
assert_equal nil, converter.call(nil)
|
338
|
+
timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
|
339
|
+
expected = "2016-02-26 00:00:00.500000"
|
340
|
+
assert_equal expected, converter.call(timestamp)
|
341
|
+
|
342
|
+
converter = ValueConverterFactory.new(
|
343
|
+
SCHEMA_TYPE, 'DATETIME', timezone: 'Asia/Tokyo'
|
344
|
+
).create_converter
|
345
|
+
assert_equal nil, converter.call(nil)
|
346
|
+
timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
|
347
|
+
expected = "2016-02-26 00:00:00.500000"
|
348
|
+
assert_equal expected, converter.call(timestamp)
|
349
|
+
|
350
|
+
assert_raise { converter.call('foo') }
|
351
|
+
end
|
352
|
+
|
271
353
|
def test_record
|
272
354
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
|
273
355
|
end
|
@@ -298,6 +380,10 @@ module Embulk
|
|
298
380
|
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'TIMESTAMP').create_converter }
|
299
381
|
end
|
300
382
|
|
383
|
+
def test_date
|
384
|
+
assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
|
385
|
+
end
|
386
|
+
|
301
387
|
def test_record
|
302
388
|
converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
|
303
389
|
assert_equal nil, converter.call(nil)
|
metadata
CHANGED
@@ -1,67 +1,121 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
- Naotoshi Seo
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-06-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
+
name: signet
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
|
-
- - "
|
18
|
+
- - "~>"
|
18
19
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
|
20
|
+
version: '0.7'
|
21
|
+
- - "<"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.12.0
|
24
|
+
type: :runtime
|
21
25
|
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - "~>"
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '0.7'
|
31
|
+
- - "<"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.12.0
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: google-api-client
|
36
|
+
requirement: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "<"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.33.0
|
22
41
|
type: :runtime
|
42
|
+
prerelease: false
|
23
43
|
version_requirements: !ruby/object:Gem::Requirement
|
24
44
|
requirements:
|
25
|
-
- - "
|
45
|
+
- - "<"
|
26
46
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
47
|
+
version: 0.33.0
|
28
48
|
- !ruby/object:Gem::Dependency
|
49
|
+
name: time_with_zone
|
29
50
|
requirement: !ruby/object:Gem::Requirement
|
30
51
|
requirements:
|
31
52
|
- - ">="
|
32
53
|
- !ruby/object:Gem::Version
|
33
54
|
version: '0'
|
34
|
-
name: time_with_zone
|
35
|
-
prerelease: false
|
36
55
|
type: :runtime
|
56
|
+
prerelease: false
|
37
57
|
version_requirements: !ruby/object:Gem::Requirement
|
38
58
|
requirements:
|
39
59
|
- - ">="
|
40
60
|
- !ruby/object:Gem::Version
|
41
61
|
version: '0'
|
42
62
|
- !ruby/object:Gem::Dependency
|
63
|
+
name: representable
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 3.0.0
|
69
|
+
- - "<"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '3.1'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - "~>"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 3.0.0
|
79
|
+
- - "<"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '3.1'
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: faraday
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0.12'
|
89
|
+
type: :runtime
|
90
|
+
prerelease: false
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0.12'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: bundler
|
43
98
|
requirement: !ruby/object:Gem::Requirement
|
44
99
|
requirements:
|
45
100
|
- - ">="
|
46
101
|
- !ruby/object:Gem::Version
|
47
102
|
version: 1.10.6
|
48
|
-
name: bundler
|
49
|
-
prerelease: false
|
50
103
|
type: :development
|
104
|
+
prerelease: false
|
51
105
|
version_requirements: !ruby/object:Gem::Requirement
|
52
106
|
requirements:
|
53
107
|
- - ">="
|
54
108
|
- !ruby/object:Gem::Version
|
55
109
|
version: 1.10.6
|
56
110
|
- !ruby/object:Gem::Dependency
|
111
|
+
name: rake
|
57
112
|
requirement: !ruby/object:Gem::Requirement
|
58
113
|
requirements:
|
59
114
|
- - ">="
|
60
115
|
- !ruby/object:Gem::Version
|
61
116
|
version: '10.0'
|
62
|
-
name: rake
|
63
|
-
prerelease: false
|
64
117
|
type: :development
|
118
|
+
prerelease: false
|
65
119
|
version_requirements: !ruby/object:Gem::Requirement
|
66
120
|
requirements:
|
67
121
|
- - ">="
|
@@ -103,7 +157,7 @@ homepage: https://github.com/embulk/embulk-output-bigquery
|
|
103
157
|
licenses:
|
104
158
|
- MIT
|
105
159
|
metadata: {}
|
106
|
-
post_install_message:
|
160
|
+
post_install_message:
|
107
161
|
rdoc_options: []
|
108
162
|
require_paths:
|
109
163
|
- lib
|
@@ -119,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
173
|
version: '0'
|
120
174
|
requirements: []
|
121
175
|
rubygems_version: 3.0.3
|
122
|
-
signing_key:
|
176
|
+
signing_key:
|
123
177
|
specification_version: 4
|
124
178
|
summary: Google BigQuery output plugin for Embulk
|
125
179
|
test_files:
|