embulk-output-bigquery 0.7.0 → 0.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/check.yml +12 -21
- data/CHANGELOG.md +11 -0
- data/Gemfile +1 -1
- data/README.md +2 -0
- data/embulk-output-bigquery.gemspec +3 -2
- data/lib/embulk/output/bigquery/bigquery_client.rb +4 -3
- data/lib/embulk/output/bigquery/gcs_client.rb +3 -3
- data/lib/embulk/output/bigquery/helper.rb +5 -4
- data/lib/embulk/output/bigquery.rb +2 -0
- data/test/test_helper.rb +2 -2
- metadata +21 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3f1f3663f59942ba67caf8d3aa2785df824aaef5cc09de0bdcbadadd3642d81
|
4
|
+
data.tar.gz: 389b3111be5db9924f67fd2ce3d5d2c0afbe46d12c44c7df6ee5c95e04269c79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5136502bb9003aa85f85c9c5d3b5c826f218476a7604f7d201160bf4931936f8f02b00fb77ea286b9ae8fc69b007b122a1f7f0707cec4a7b7d030d1d1771ce86
|
7
|
+
data.tar.gz: c16e57af1121e30e0bcfa187311d9f7652768a2bda79354202ce7bac92e9e5bf3bc33cdb2fbb95696f03653fa830f844f95bd01e200cb0b7d9732f2a3b989bad
|
data/.github/workflows/check.yml
CHANGED
@@ -9,34 +9,25 @@ jobs:
|
|
9
9
|
strategy:
|
10
10
|
matrix:
|
11
11
|
jruby_version:
|
12
|
-
- 9.3.
|
13
|
-
- 9.4.
|
12
|
+
- 9.3.15.0
|
13
|
+
- 9.4.8.0
|
14
14
|
fail-fast: false
|
15
15
|
steps:
|
16
|
-
- uses: actions/checkout@
|
16
|
+
- uses: actions/checkout@v4
|
17
17
|
- name: Set up OpenJDK 8
|
18
|
-
uses: actions/setup-java@
|
18
|
+
uses: actions/setup-java@v4
|
19
19
|
with:
|
20
20
|
java-version: 8
|
21
21
|
distribution: "temurin"
|
22
|
-
-
|
23
|
-
|
24
|
-
ruby-version: 'jruby-${{ matrix.jruby_version }}'
|
25
|
-
bundler-cache: true
|
26
|
-
- name: show ruby version
|
27
|
-
run: ruby -v
|
28
|
-
- name: bundle install
|
29
|
-
run: bundle install
|
22
|
+
- name: download jruby
|
23
|
+
run: "curl -L -o jruby.jar https://repo1.maven.org/maven2/org/jruby/jruby-complete/${{ matrix.jruby_version }}/jruby-complete-${{ matrix.jruby_version }}.jar"
|
30
24
|
#
|
31
|
-
#
|
25
|
+
# For avoiding permission denied. install gems into `gems` directory
|
32
26
|
#
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
#
|
37
|
-
- name: install rake 13.1.0
|
38
|
-
run: gem install rake -v 13.1.0
|
27
|
+
- name: bundle install
|
28
|
+
run: "env GEM_HOME=gems java -jar jruby.jar -S bundle install"
|
29
|
+
|
39
30
|
- name: install embulk.jar
|
40
|
-
run: "curl -L -o embulk.jar https://github.com/embulk/embulk/releases/download/v0.
|
31
|
+
run: "curl -L -o embulk.jar https://github.com/embulk/embulk/releases/download/v0.11.4/embulk-0.11.4.jar"
|
41
32
|
- name: rake test
|
42
|
-
run:
|
33
|
+
run: 'env GEM_HOME=gems RUBYOPT="-r ./embulk.jar -r rubygems" java -jar jruby.jar -S bundle exec rake test'
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.7.2 - 2024-07-21
|
2
|
+
* [maintenance] Fix GitHub Actions #166
|
3
|
+
* [maintenance] Fix gcs_client in order to load data using gcs_bucket parameter (Thanks to kashira202111) #164
|
4
|
+
* [maintenance] Prevent creating unnecessary tables. (Thanks to kashira202111) #148
|
5
|
+
|
6
|
+
## 0.7.1 - 2024-03-4
|
7
|
+
* [enhancement] Support description of columns and tables (Thanks to @kyoshidajp and @fagai ) #142
|
8
|
+
* [maintenance] Add missing GitHub Actions environment setting. #160
|
9
|
+
* [maintenance] Replace google-api-client with specific Google APIs (Thanks to @Nozomuts) #161
|
10
|
+
* [maintenance] Update GitHub Actions use checkout@v4 and setup-java@v4 #162
|
11
|
+
|
1
12
|
## 0.7.0 - 2024-02-1
|
2
13
|
* [enhancement] Add support Embulk 0.11.x
|
3
14
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -60,6 +60,7 @@ OAuth flow for installed applications.
|
|
60
60
|
| gcs_bucket | string | optional | nil | See [GCS Bucket](#gcs-bucket) |
|
61
61
|
| auto_create_gcs_bucket | boolean | optional | false | See [GCS Bucket](#gcs-bucket) |
|
62
62
|
| progress_log_interval | float | optional | nil (Disabled) | Progress log interval. The progress log is disabled by nil (default). NOTE: This option may be removed in a future because a filter plugin can achieve the same goal |
|
63
|
+
| description | string | optional | nil | description of table |
|
63
64
|
|
64
65
|
Client or request options
|
65
66
|
|
@@ -329,6 +330,7 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
329
330
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
330
331
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
331
332
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
333
|
+
- **description**: description (string, default is `None`).
|
332
334
|
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
333
335
|
- **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
|
334
336
|
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.7.
|
3
|
+
spec.version = "0.7.2"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -15,7 +15,8 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
|
17
17
|
# the latest version
|
18
|
-
spec.add_dependency 'google-
|
18
|
+
spec.add_dependency 'google-apis-storage_v1'
|
19
|
+
spec.add_dependency 'google-apis-bigquery_v2'
|
19
20
|
spec.add_dependency 'time_with_zone'
|
20
21
|
spec.add_dependency 'thwait'
|
21
22
|
# activesupport require Ruby >= 2.7.0
|
@@ -420,6 +420,7 @@ module Embulk
|
|
420
420
|
table_reference: {
|
421
421
|
table_id: table,
|
422
422
|
},
|
423
|
+
description: @task['description'],
|
423
424
|
schema: {
|
424
425
|
fields: fields,
|
425
426
|
}
|
@@ -446,8 +447,8 @@ module Embulk
|
|
446
447
|
end
|
447
448
|
|
448
449
|
opts = {}
|
449
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@
|
450
|
-
with_network_retry { client.insert_table(@
|
450
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@destination_project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
451
|
+
with_network_retry { client.insert_table(@destination_project, dataset, body, **opts) }
|
451
452
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
452
453
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
453
454
|
# ignore 'Already Exists' error
|
@@ -456,7 +457,7 @@ module Embulk
|
|
456
457
|
|
457
458
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
458
459
|
Embulk.logger.error {
|
459
|
-
"embulk-output-bigquery: insert_table(#{@
|
460
|
+
"embulk-output-bigquery: insert_table(#{@destination_project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
460
461
|
}
|
461
462
|
raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
462
463
|
end
|
@@ -48,7 +48,7 @@ module Embulk
|
|
48
48
|
opts = {}
|
49
49
|
|
50
50
|
Embulk.logger.debug { "embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts})" }
|
51
|
-
with_network_retry { client.insert_bucket(@project, body, opts) }
|
51
|
+
with_network_retry { client.insert_bucket(@project, body, **opts) }
|
52
52
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
53
53
|
if e.status_code == 409 && /conflict:/ =~ e.message
|
54
54
|
# ignore 'Already Exists' error
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
|
82
82
|
Embulk.logger.debug { "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts})" }
|
83
83
|
# memo: gcs is strongly consistent for insert (read-after-write). ref: https://cloud.google.com/storage/docs/consistency
|
84
|
-
with_network_retry { client.insert_object(bucket, body, opts) }
|
84
|
+
with_network_retry { client.insert_object(bucket, body, **opts) }
|
85
85
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
86
86
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
87
87
|
Embulk.logger.error {
|
@@ -114,7 +114,7 @@ module Embulk
|
|
114
114
|
opts = {}
|
115
115
|
|
116
116
|
Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
|
117
|
-
response = with_network_retry { client.delete_object(bucket, object, opts) }
|
117
|
+
response = with_network_retry { client.delete_object(bucket, object, **opts) }
|
118
118
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
119
119
|
if e.status_code == 404 # ignore 'notFound' error
|
120
120
|
return nil
|
@@ -46,10 +46,11 @@ module Embulk
|
|
46
46
|
embulk_type = column[:type]
|
47
47
|
column_option = column_options_map[column_name] || {}
|
48
48
|
{}.tap do |field|
|
49
|
-
field[:name]
|
50
|
-
field[:type]
|
51
|
-
field[:mode]
|
52
|
-
field[:fields]
|
49
|
+
field[:name] = column_name
|
50
|
+
field[:type] = (column_option['type'] || bq_type_from_embulk_type(embulk_type)).upcase
|
51
|
+
field[:mode] = column_option['mode'] if column_option['mode']
|
52
|
+
field[:fields] = deep_symbolize_keys(column_option['fields']) if column_option['fields']
|
53
|
+
field[:description] = column_option['description'] if column_option['description']
|
53
54
|
end
|
54
55
|
end
|
55
56
|
end
|
@@ -63,6 +63,8 @@ module Embulk
|
|
63
63
|
'payload_column' => config.param('payload_column', :string, :default => nil),
|
64
64
|
'payload_column_index' => config.param('payload_column_index', :integer, :default => nil),
|
65
65
|
|
66
|
+
'description' => config.param('description', :string, :default => nil),
|
67
|
+
|
66
68
|
'open_timeout_sec' => config.param('open_timeout_sec', :integer, :default => nil),
|
67
69
|
'timeout_sec' => config.param('timeout_sec', :integer, :default => nil), # google-api-ruby-client < v0.11.0
|
68
70
|
'send_timeout_sec' => config.param('send_timeout_sec', :integer, :default => nil), # google-api-ruby-client >= v0.11.0
|
data/test/test_helper.rb
CHANGED
@@ -68,7 +68,7 @@ module Embulk
|
|
68
68
|
])
|
69
69
|
task = {
|
70
70
|
'column_options' => [
|
71
|
-
{'name' => 'boolean', 'type' => 'STRING', 'mode' => 'REQUIRED'},
|
71
|
+
{'name' => 'boolean', 'type' => 'STRING', 'mode' => 'REQUIRED', 'description' => 'hoge'},
|
72
72
|
{'name' => 'long', 'type' => 'STRING'},
|
73
73
|
{'name' => 'double', 'type' => 'STRING'},
|
74
74
|
{'name' => 'string', 'type' => 'INTEGER'},
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
],
|
82
82
|
}
|
83
83
|
expected = [
|
84
|
-
{name: 'boolean',
|
84
|
+
{name: 'boolean', type: 'STRING', mode: 'REQUIRED', description: 'hoge'},
|
85
85
|
{name: 'long', type: 'STRING'},
|
86
86
|
{name: 'double', type: 'STRING'},
|
87
87
|
{name: 'string', type: 'INTEGER'},
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,22 +9,36 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-
|
12
|
+
date: 2024-07-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name: google-
|
15
|
+
name: google-apis-storage_v1
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 0
|
20
|
+
version: '0'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: google-apis-bigquery_v2
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
26
33
|
- !ruby/object:Gem::Version
|
27
|
-
version: 0
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
28
42
|
- !ruby/object:Gem::Dependency
|
29
43
|
name: time_with_zone
|
30
44
|
requirement: !ruby/object:Gem::Requirement
|