embulk-output-bigquery 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/check.yml +12 -21
- data/CHANGELOG.md +11 -0
- data/Gemfile +1 -1
- data/README.md +2 -0
- data/embulk-output-bigquery.gemspec +3 -2
- data/lib/embulk/output/bigquery/bigquery_client.rb +4 -3
- data/lib/embulk/output/bigquery/gcs_client.rb +3 -3
- data/lib/embulk/output/bigquery/helper.rb +5 -4
- data/lib/embulk/output/bigquery.rb +2 -0
- data/test/test_helper.rb +2 -2
- metadata +21 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3f1f3663f59942ba67caf8d3aa2785df824aaef5cc09de0bdcbadadd3642d81
|
4
|
+
data.tar.gz: 389b3111be5db9924f67fd2ce3d5d2c0afbe46d12c44c7df6ee5c95e04269c79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5136502bb9003aa85f85c9c5d3b5c826f218476a7604f7d201160bf4931936f8f02b00fb77ea286b9ae8fc69b007b122a1f7f0707cec4a7b7d030d1d1771ce86
|
7
|
+
data.tar.gz: c16e57af1121e30e0bcfa187311d9f7652768a2bda79354202ce7bac92e9e5bf3bc33cdb2fbb95696f03653fa830f844f95bd01e200cb0b7d9732f2a3b989bad
|
data/.github/workflows/check.yml
CHANGED
@@ -9,34 +9,25 @@ jobs:
|
|
9
9
|
strategy:
|
10
10
|
matrix:
|
11
11
|
jruby_version:
|
12
|
-
- 9.3.
|
13
|
-
- 9.4.
|
12
|
+
- 9.3.15.0
|
13
|
+
- 9.4.8.0
|
14
14
|
fail-fast: false
|
15
15
|
steps:
|
16
|
-
- uses: actions/checkout@
|
16
|
+
- uses: actions/checkout@v4
|
17
17
|
- name: Set up OpenJDK 8
|
18
|
-
uses: actions/setup-java@
|
18
|
+
uses: actions/setup-java@v4
|
19
19
|
with:
|
20
20
|
java-version: 8
|
21
21
|
distribution: "temurin"
|
22
|
-
-
|
23
|
-
|
24
|
-
ruby-version: 'jruby-${{ matrix.jruby_version }}'
|
25
|
-
bundler-cache: true
|
26
|
-
- name: show ruby version
|
27
|
-
run: ruby -v
|
28
|
-
- name: bundle install
|
29
|
-
run: bundle install
|
22
|
+
- name: download jruby
|
23
|
+
run: "curl -L -o jruby.jar https://repo1.maven.org/maven2/org/jruby/jruby-complete/${{ matrix.jruby_version }}/jruby-complete-${{ matrix.jruby_version }}.jar"
|
30
24
|
#
|
31
|
-
#
|
25
|
+
# For avoiding permission denied. install gems into `gems` directory
|
32
26
|
#
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
#
|
37
|
-
- name: install rake 13.1.0
|
38
|
-
run: gem install rake -v 13.1.0
|
27
|
+
- name: bundle install
|
28
|
+
run: "env GEM_HOME=gems java -jar jruby.jar -S bundle install"
|
29
|
+
|
39
30
|
- name: install embulk.jar
|
40
|
-
run: "curl -L -o embulk.jar https://github.com/embulk/embulk/releases/download/v0.
|
31
|
+
run: "curl -L -o embulk.jar https://github.com/embulk/embulk/releases/download/v0.11.4/embulk-0.11.4.jar"
|
41
32
|
- name: rake test
|
42
|
-
run:
|
33
|
+
run: 'env GEM_HOME=gems RUBYOPT="-r ./embulk.jar -r rubygems" java -jar jruby.jar -S bundle exec rake test'
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.7.2 - 2024-07-21
|
2
|
+
* [maintenance] Fix GitHub Actions #166
|
3
|
+
* [maintenance] Fix gcs_client in order to load data using gcs_bucket parameter (Thanks to kashira202111) #164
|
4
|
+
* [maintenance] Prevent creating unnecessary tables. (Thanks to kashira202111) #148
|
5
|
+
|
6
|
+
## 0.7.1 - 2024-03-4
|
7
|
+
* [enhancement] Support description of columns and tables (Thanks to @kyoshidajp and @fagai ) #142
|
8
|
+
* [maintenance] Add missing GitHub Actions environment setting. #160
|
9
|
+
* [maintenance] Replace google-api-client with specific Google APIs (Thanks to @Nozomuts) #161
|
10
|
+
* [maintenance] Update GitHub Actions use checkout@v4 and setup-java@v4 #162
|
11
|
+
|
1
12
|
## 0.7.0 - 2024-02-1
|
2
13
|
* [enhancement] Add support Embulk 0.11.x
|
3
14
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -60,6 +60,7 @@ OAuth flow for installed applications.
|
|
60
60
|
| gcs_bucket | string | optional | nil | See [GCS Bucket](#gcs-bucket) |
|
61
61
|
| auto_create_gcs_bucket | boolean | optional | false | See [GCS Bucket](#gcs-bucket) |
|
62
62
|
| progress_log_interval | float | optional | nil (Disabled) | Progress log interval. The progress log is disabled by nil (default). NOTE: This option may be removed in a future because a filter plugin can achieve the same goal |
|
63
|
+
| description | string | optional | nil | description of table |
|
63
64
|
|
64
65
|
Client or request options
|
65
66
|
|
@@ -329,6 +330,7 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
|
|
329
330
|
- json: `STRING`, `RECORD` (default: `STRING`)
|
330
331
|
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
331
332
|
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
333
|
+
- **description**: description (string, default is `None`).
|
332
334
|
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
333
335
|
- **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
|
334
336
|
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.7.
|
3
|
+
spec.version = "0.7.2"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -15,7 +15,8 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.require_paths = ["lib"]
|
16
16
|
|
17
17
|
# the latest version
|
18
|
-
spec.add_dependency 'google-
|
18
|
+
spec.add_dependency 'google-apis-storage_v1'
|
19
|
+
spec.add_dependency 'google-apis-bigquery_v2'
|
19
20
|
spec.add_dependency 'time_with_zone'
|
20
21
|
spec.add_dependency 'thwait'
|
21
22
|
# activesupport require Ruby >= 2.7.0
|
@@ -420,6 +420,7 @@ module Embulk
|
|
420
420
|
table_reference: {
|
421
421
|
table_id: table,
|
422
422
|
},
|
423
|
+
description: @task['description'],
|
423
424
|
schema: {
|
424
425
|
fields: fields,
|
425
426
|
}
|
@@ -446,8 +447,8 @@ module Embulk
|
|
446
447
|
end
|
447
448
|
|
448
449
|
opts = {}
|
449
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@
|
450
|
-
with_network_retry { client.insert_table(@
|
450
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@destination_project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
451
|
+
with_network_retry { client.insert_table(@destination_project, dataset, body, **opts) }
|
451
452
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
452
453
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
453
454
|
# ignore 'Already Exists' error
|
@@ -456,7 +457,7 @@ module Embulk
|
|
456
457
|
|
457
458
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
458
459
|
Embulk.logger.error {
|
459
|
-
"embulk-output-bigquery: insert_table(#{@
|
460
|
+
"embulk-output-bigquery: insert_table(#{@destination_project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
460
461
|
}
|
461
462
|
raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
462
463
|
end
|
@@ -48,7 +48,7 @@ module Embulk
|
|
48
48
|
opts = {}
|
49
49
|
|
50
50
|
Embulk.logger.debug { "embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts})" }
|
51
|
-
with_network_retry { client.insert_bucket(@project, body, opts) }
|
51
|
+
with_network_retry { client.insert_bucket(@project, body, **opts) }
|
52
52
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
53
53
|
if e.status_code == 409 && /conflict:/ =~ e.message
|
54
54
|
# ignore 'Already Exists' error
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
|
82
82
|
Embulk.logger.debug { "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts})" }
|
83
83
|
# memo: gcs is strongly consistent for insert (read-after-write). ref: https://cloud.google.com/storage/docs/consistency
|
84
|
-
with_network_retry { client.insert_object(bucket, body, opts) }
|
84
|
+
with_network_retry { client.insert_object(bucket, body, **opts) }
|
85
85
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
86
86
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
87
87
|
Embulk.logger.error {
|
@@ -114,7 +114,7 @@ module Embulk
|
|
114
114
|
opts = {}
|
115
115
|
|
116
116
|
Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
|
117
|
-
response = with_network_retry { client.delete_object(bucket, object, opts) }
|
117
|
+
response = with_network_retry { client.delete_object(bucket, object, **opts) }
|
118
118
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
119
119
|
if e.status_code == 404 # ignore 'notFound' error
|
120
120
|
return nil
|
@@ -46,10 +46,11 @@ module Embulk
|
|
46
46
|
embulk_type = column[:type]
|
47
47
|
column_option = column_options_map[column_name] || {}
|
48
48
|
{}.tap do |field|
|
49
|
-
field[:name]
|
50
|
-
field[:type]
|
51
|
-
field[:mode]
|
52
|
-
field[:fields]
|
49
|
+
field[:name] = column_name
|
50
|
+
field[:type] = (column_option['type'] || bq_type_from_embulk_type(embulk_type)).upcase
|
51
|
+
field[:mode] = column_option['mode'] if column_option['mode']
|
52
|
+
field[:fields] = deep_symbolize_keys(column_option['fields']) if column_option['fields']
|
53
|
+
field[:description] = column_option['description'] if column_option['description']
|
53
54
|
end
|
54
55
|
end
|
55
56
|
end
|
@@ -63,6 +63,8 @@ module Embulk
|
|
63
63
|
'payload_column' => config.param('payload_column', :string, :default => nil),
|
64
64
|
'payload_column_index' => config.param('payload_column_index', :integer, :default => nil),
|
65
65
|
|
66
|
+
'description' => config.param('description', :string, :default => nil),
|
67
|
+
|
66
68
|
'open_timeout_sec' => config.param('open_timeout_sec', :integer, :default => nil),
|
67
69
|
'timeout_sec' => config.param('timeout_sec', :integer, :default => nil), # google-api-ruby-client < v0.11.0
|
68
70
|
'send_timeout_sec' => config.param('send_timeout_sec', :integer, :default => nil), # google-api-ruby-client >= v0.11.0
|
data/test/test_helper.rb
CHANGED
@@ -68,7 +68,7 @@ module Embulk
|
|
68
68
|
])
|
69
69
|
task = {
|
70
70
|
'column_options' => [
|
71
|
-
{'name' => 'boolean', 'type' => 'STRING', 'mode' => 'REQUIRED'},
|
71
|
+
{'name' => 'boolean', 'type' => 'STRING', 'mode' => 'REQUIRED', 'description' => 'hoge'},
|
72
72
|
{'name' => 'long', 'type' => 'STRING'},
|
73
73
|
{'name' => 'double', 'type' => 'STRING'},
|
74
74
|
{'name' => 'string', 'type' => 'INTEGER'},
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
],
|
82
82
|
}
|
83
83
|
expected = [
|
84
|
-
{name: 'boolean',
|
84
|
+
{name: 'boolean', type: 'STRING', mode: 'REQUIRED', description: 'hoge'},
|
85
85
|
{name: 'long', type: 'STRING'},
|
86
86
|
{name: 'double', type: 'STRING'},
|
87
87
|
{name: 'string', type: 'INTEGER'},
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,22 +9,36 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-
|
12
|
+
date: 2024-07-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name: google-
|
15
|
+
name: google-apis-storage_v1
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 0
|
20
|
+
version: '0'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: google-apis-bigquery_v2
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
26
33
|
- !ruby/object:Gem::Version
|
27
|
-
version: 0
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
28
42
|
- !ruby/object:Gem::Dependency
|
29
43
|
name: time_with_zone
|
30
44
|
requirement: !ruby/object:Gem::Requirement
|