fluent-plugin-bigquery-storage-write 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +56 -9
- data/README.md +27 -15
- data/fluent-plugin-bigquery-storage-write.gemspec +2 -1
- data/lib/fluent/plugin/bigquery/storage/writer.rb +20 -2
- data/lib/fluent/plugin/out_bigquery_storage_write_insert.rb +14 -2
- data/test/plugin/test_out_bigquery_storage_write_insert.rb +45 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90b2ce80850ba17a1eb9f9f2745cd91bd6a3b876cb27340e349c3a63d5af0bc6
|
4
|
+
data.tar.gz: 1825d48e453d0723c23fccc3434cc17430acd56abcf8bb33399af973e8e48a14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 954753c0de9faae62bb19653d04103f8819f23becaa6cc8f9cd4a9c29a1ca5ec1aba0901ba3bd92957c8f8e22e46e419a820bf1f11656269319d7c1e5d76dedd
|
7
|
+
data.tar.gz: 545aeb35cce6d292805790340134950e36aa88bf67567c2e8fc7ab8cabca5c256557dd71982237d19ddc91ad8458e78e8fbc7a61a4bf1ee45e21e1a74e4e74eb
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fluent-plugin-bigquery-storage-write (0.
|
4
|
+
fluent-plugin-bigquery-storage-write (0.2.0)
|
5
5
|
fluentd (>= 0.14.10, < 2)
|
6
|
+
google-api-client (>= 0.53.0)
|
6
7
|
google-cloud-bigquery-storage (>= 1.3.0)
|
7
8
|
googleauth (>= 1.5.2)
|
8
9
|
grpc (>= 1.55)
|
@@ -10,11 +11,17 @@ PATH
|
|
10
11
|
GEM
|
11
12
|
remote: https://rubygems.org/
|
12
13
|
specs:
|
14
|
+
activesupport (7.0.5)
|
15
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
16
|
+
i18n (>= 1.6, < 2)
|
17
|
+
minitest (>= 5.1)
|
18
|
+
tzinfo (~> 2.0)
|
13
19
|
addressable (2.8.4)
|
14
20
|
public_suffix (>= 2.0.2, < 6.0)
|
15
21
|
concurrent-ruby (1.2.2)
|
16
22
|
cool.io (1.7.1)
|
17
|
-
|
23
|
+
declarative (0.0.20)
|
24
|
+
faraday (2.7.7)
|
18
25
|
faraday-net_http (>= 2.0, < 3.1)
|
19
26
|
ruby2_keywords (>= 0.0.4)
|
20
27
|
faraday-net_http (3.0.2)
|
@@ -40,6 +47,27 @@ GEM
|
|
40
47
|
googleapis-common-protos-types (>= 1.3.1, < 2.a)
|
41
48
|
googleauth (~> 1.0)
|
42
49
|
grpc (~> 1.36)
|
50
|
+
gems (1.2.0)
|
51
|
+
google-api-client (0.53.0)
|
52
|
+
google-apis-core (~> 0.1)
|
53
|
+
google-apis-generator (~> 0.1)
|
54
|
+
google-apis-core (0.11.0)
|
55
|
+
addressable (~> 2.5, >= 2.5.1)
|
56
|
+
googleauth (>= 0.16.2, < 2.a)
|
57
|
+
httpclient (>= 2.8.1, < 3.a)
|
58
|
+
mini_mime (~> 1.0)
|
59
|
+
representable (~> 3.0)
|
60
|
+
retriable (>= 2.0, < 4.a)
|
61
|
+
rexml
|
62
|
+
webrick
|
63
|
+
google-apis-discovery_v1 (0.14.0)
|
64
|
+
google-apis-core (>= 0.11.0, < 2.a)
|
65
|
+
google-apis-generator (0.12.0)
|
66
|
+
activesupport (>= 5.0)
|
67
|
+
gems (~> 1.2)
|
68
|
+
google-apis-core (>= 0.11.0, < 2.a)
|
69
|
+
google-apis-discovery_v1 (~> 0.5)
|
70
|
+
thor (>= 0.20, < 2.a)
|
43
71
|
google-cloud-bigquery-storage (1.3.0)
|
44
72
|
google-cloud-bigquery-storage-v1 (>= 0.8, < 2.a)
|
45
73
|
google-cloud-core (~> 1.6)
|
@@ -52,7 +80,8 @@ GEM
|
|
52
80
|
google-cloud-env (1.6.0)
|
53
81
|
faraday (>= 0.17.3, < 3.0)
|
54
82
|
google-cloud-errors (1.3.1)
|
55
|
-
google-protobuf (3.23.
|
83
|
+
google-protobuf (3.23.3-arm64-darwin)
|
84
|
+
google-protobuf (3.23.3-x86_64-linux)
|
56
85
|
googleapis-common-protos (1.4.0)
|
57
86
|
google-protobuf (~> 3.14)
|
58
87
|
googleapis-common-protos-types (~> 1.2)
|
@@ -66,19 +95,33 @@ GEM
|
|
66
95
|
multi_json (~> 1.11)
|
67
96
|
os (>= 0.9, < 2.0)
|
68
97
|
signet (>= 0.16, < 2.a)
|
69
|
-
grpc (1.
|
98
|
+
grpc (1.56.0)
|
70
99
|
google-protobuf (~> 3.23)
|
71
100
|
googleapis-common-protos-types (~> 1.0)
|
72
|
-
grpc
|
101
|
+
grpc (1.56.0-x86_64-linux)
|
102
|
+
google-protobuf (~> 3.23)
|
103
|
+
googleapis-common-protos-types (~> 1.0)
|
104
|
+
grpc-tools (1.56.0)
|
73
105
|
http_parser.rb (0.8.0)
|
106
|
+
httpclient (2.8.3)
|
107
|
+
i18n (1.14.1)
|
108
|
+
concurrent-ruby (~> 1.0)
|
74
109
|
jwt (2.7.1)
|
75
110
|
memoist (0.16.2)
|
111
|
+
mini_mime (1.1.2)
|
112
|
+
minitest (5.18.1)
|
76
113
|
msgpack (1.7.1)
|
77
114
|
multi_json (1.15.0)
|
78
115
|
os (1.1.4)
|
79
|
-
power_assert (
|
116
|
+
power_assert (2.0.3)
|
80
117
|
public_suffix (5.0.1)
|
81
|
-
rake (13.0.
|
118
|
+
rake (13.0.6)
|
119
|
+
representable (3.2.0)
|
120
|
+
declarative (< 0.1.0)
|
121
|
+
trailblazer-option (>= 0.1.1, < 0.2.0)
|
122
|
+
uber (< 0.2.0)
|
123
|
+
retriable (3.1.2)
|
124
|
+
rexml (3.2.5)
|
82
125
|
ruby2_keywords (0.0.5)
|
83
126
|
serverengine (2.3.2)
|
84
127
|
sigdump (~> 0.2.2)
|
@@ -89,17 +132,21 @@ GEM
|
|
89
132
|
jwt (>= 1.5, < 3.0)
|
90
133
|
multi_json (~> 1.10)
|
91
134
|
strptime (0.2.5)
|
92
|
-
test-unit (3.
|
135
|
+
test-unit (3.6.0)
|
93
136
|
power_assert
|
137
|
+
thor (1.2.2)
|
138
|
+
trailblazer-option (0.1.2)
|
94
139
|
tzinfo (2.0.6)
|
95
140
|
concurrent-ruby (~> 1.0)
|
96
141
|
tzinfo-data (1.2023.3)
|
97
142
|
tzinfo (>= 1.0.0)
|
98
|
-
|
143
|
+
uber (0.1.0)
|
144
|
+
webrick (1.8.1)
|
99
145
|
yajl-ruby (1.4.3)
|
100
146
|
|
101
147
|
PLATFORMS
|
102
148
|
arm64-darwin-22
|
149
|
+
x86_64-linux
|
103
150
|
|
104
151
|
DEPENDENCIES
|
105
152
|
bundler
|
data/README.md
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
# fluent-plugin-bigquery-storage-write
|
2
2
|
|
3
|
+

|
4
|
+
[](http://badge.fury.io/rb/fluent-plugin-bigquery-storage-write)
|
5
|
+
|
3
6
|
[Fluentd](https://fluentd.org/) output plugin to insert data into BigQuery through storage write api.
|
4
7
|
|
5
8
|
## Overview
|
6
9
|
|
7
10
|
Google Cloud Bigquery output plugin for [Fluentd](https://fluentd.org/).
|
8
|
-
The main difference from [fluent-plugin-bigquery](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery) is that it uses BigQuery new API `Storage Write`.
|
11
|
+
The main difference from [fluent-plugin-bigquery](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery) is that it uses BigQuery new API called `Storage Write API`.
|
9
12
|
|
10
13
|
Advantages of using the Storage Write API are described [here](https://cloud.google.com/bigquery/docs/write-api#advantages).
|
11
14
|
|
@@ -35,15 +38,19 @@ bundle
|
|
35
38
|
|
36
39
|
### bigquery_storage_write_insert
|
37
40
|
|
38
|
-
| name
|
39
|
-
|
40
|
-
| auth_method
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
41
|
+
| name | type | required? | default | description |
|
42
|
+
|:-------------------------|:-------|:------------------|:--------------------|:-------------------------------------------------------------------------------------------------------------|
|
43
|
+
| auth_method | enum | yes | application_default | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
44
|
+
| email | string | yes (private_key) | nil | GCP Service Account Email |
|
45
|
+
| private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
|
46
|
+
| private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
|
47
|
+
| json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
|
48
|
+
| project | string | yes | nil | |
|
49
|
+
| dataset | string | yes | nil | |
|
50
|
+
| table | string | yes | nil | |
|
51
|
+
| ignore_unknown_fields | bool | no | true | If False, raise errors for unknown fields. |
|
52
|
+
| proto_schema_rb_path | string | yes | nil | Generated Protocol Buffers schema .rb file path. |
|
53
|
+
| proto_message_class_name | string | no | nil | Class name of Protocol Buffers message. If not specified, table value that converted to pascal case is used. |
|
47
54
|
|
48
55
|
### buffer section
|
49
56
|
|
@@ -69,7 +76,7 @@ Write code `.proto` and compile it using `protoc`.
|
|
69
76
|
The sample code with BigQuery schema is located in the path below `proto/test_data.proto`.
|
70
77
|
|
71
78
|
```sh
|
72
|
-
|
79
|
+
protoc -I proto --ruby_out=proto proto/test_data.proto
|
73
80
|
```
|
74
81
|
|
75
82
|
Next, specify generated ruby code path to fluentd configuration file.
|
@@ -92,11 +99,16 @@ Next, specify generated ruby code path to fluentd configuration file.
|
|
92
99
|
## Tips
|
93
100
|
|
94
101
|
- Can I dynamically retrieve and use the BigQuery table schema?
|
95
|
-
- No, you have to use predefined schema generated
|
102
|
+
- No, you have to use predefined schema generated by `protoc`.
|
96
103
|
- Also, you have to create BigQuery table before using this plugin.
|
97
|
-
- Where is the type conversions between Protocol Buffers and BigQuery?
|
98
|
-
- https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
|
99
|
-
- Note that some types, including google.protobuf.Timestamp
|
104
|
+
- Where is the type conversions docs between Protocol Buffers and BigQuery?
|
105
|
+
- See https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
|
106
|
+
- Note that some types, including `google.protobuf.Timestamp`, are not available due to [BigQuery limitation](https://github.com/googleapis/python-bigquery-storage/issues/257).
|
107
|
+
- Which protoc version do I need for compilation?
|
108
|
+
- [Protocol Buffers v23.0](https://github.com/protocolbuffers/protobuf/releases/tag/v23.0) is minimum version because it generates a serialized proto instead of the DSL.
|
109
|
+
- Is there any limitation on the Storage Write API?
|
110
|
+
- See https://cloud.google.com/bigquery/quotas?hl=ja#write-api-limits
|
111
|
+
- Especially, note that the maximum value of chunk_limit_size is limited to 10 MB.
|
100
112
|
|
101
113
|
## Copyright
|
102
114
|
|
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = "fluent-plugin-bigquery-storage-write"
|
6
|
-
spec.version = "0.1
|
6
|
+
spec.version = "0.2.1"
|
7
7
|
spec.authors = ["gumigumi4f"]
|
8
8
|
spec.email = ["gumigumi4f@gmail.com"]
|
9
9
|
|
@@ -28,4 +28,5 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_runtime_dependency "grpc", ">= 1.55"
|
29
29
|
spec.add_runtime_dependency "googleauth", ">= 1.5.2"
|
30
30
|
spec.add_runtime_dependency "google-cloud-bigquery-storage", ">= 1.3.0"
|
31
|
+
spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
|
31
32
|
end
|
@@ -15,8 +15,8 @@ module Fluent
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def client
|
18
|
-
@client ||= Google::Cloud::Bigquery::Storage::V1::BigQueryWrite::Client.new
|
19
|
-
|
18
|
+
@client ||= Google::Cloud::Bigquery::Storage::V1::BigQueryWrite::Client.new do |cf|
|
19
|
+
cf.credentials = get_auth
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -41,6 +41,8 @@ module Fluent
|
|
41
41
|
|
42
42
|
def get_auth
|
43
43
|
case @auth_method
|
44
|
+
when :private_key
|
45
|
+
get_auth_from_private_key
|
44
46
|
when :compute_engine
|
45
47
|
get_auth_from_compute_engine
|
46
48
|
when :json_key
|
@@ -52,6 +54,22 @@ module Fluent
|
|
52
54
|
end
|
53
55
|
end
|
54
56
|
|
57
|
+
def get_auth_from_private_key
|
58
|
+
require 'google/api_client/auth/key_utils'
|
59
|
+
private_key_path = @options[:private_key_path]
|
60
|
+
private_key_passphrase = @options[:private_key_passphrase]
|
61
|
+
email = @options[:email]
|
62
|
+
|
63
|
+
key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
|
64
|
+
Signet::OAuth2::Client.new(
|
65
|
+
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
66
|
+
audience: "https://accounts.google.com/o/oauth2/token",
|
67
|
+
scope: @scope,
|
68
|
+
issuer: email,
|
69
|
+
signing_key: key
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
55
73
|
def get_auth_from_compute_engine
|
56
74
|
Google::Auth::GCECredentials.new
|
57
75
|
end
|
@@ -14,13 +14,18 @@ module Fluent
|
|
14
14
|
|
15
15
|
helpers :inject
|
16
16
|
|
17
|
-
config_param :auth_method, :enum, list: [:compute_engine, :json_key, :application_default], default: :application_default
|
17
|
+
config_param :auth_method, :enum, list: [:private_key, :compute_engine, :json_key, :application_default], default: :application_default
|
18
|
+
config_param :email, :string, default: nil
|
19
|
+
config_param :private_key_path, :string, default: nil
|
20
|
+
config_param :private_key_passphrase, :string, default: 'notasecret', secret: true
|
18
21
|
config_param :json_key, default: nil, secret: true
|
19
22
|
|
20
23
|
config_param :project, :string
|
21
24
|
config_param :dataset, :string
|
22
25
|
config_param :table, :string
|
23
26
|
|
27
|
+
config_param :ignore_unknown_fields, :bool, default: true
|
28
|
+
|
24
29
|
config_param :proto_schema_rb_path, :string
|
25
30
|
config_param :proto_message_class_name, :string, default: nil
|
26
31
|
|
@@ -39,6 +44,10 @@ module Fluent
|
|
39
44
|
super
|
40
45
|
|
41
46
|
case @auth_method
|
47
|
+
when :private_key
|
48
|
+
unless @email && @private_key_path
|
49
|
+
raise Fluent::ConfigError, "'email' and 'private_key_path' must be specified if auth_method == 'private_key'"
|
50
|
+
end
|
42
51
|
when :compute_engine
|
43
52
|
# Do nothing
|
44
53
|
when :json_key
|
@@ -71,6 +80,9 @@ module Fluent
|
|
71
80
|
@klass = Google::Protobuf::DescriptorPool.generated_pool.lookup(message_cls_name).msgclass
|
72
81
|
|
73
82
|
@writer = Fluent::BigQuery::Storage::Writer.new(@log, @auth_method, @project, @dataset, @table, @descriptor_proto,
|
83
|
+
private_key_path: @private_key_path,
|
84
|
+
private_key_passphrase: @private_key_passphrase,
|
85
|
+
email: @email,
|
74
86
|
json_key: @json_key)
|
75
87
|
rescue => e
|
76
88
|
log.error("initialize error")
|
@@ -93,7 +105,7 @@ module Fluent
|
|
93
105
|
def write(chunk)
|
94
106
|
rows = chunk.open do |io|
|
95
107
|
io.map do |line|
|
96
|
-
val = @klass.decode_json(line, ignore_unknown_fields:
|
108
|
+
val = @klass.decode_json(line, ignore_unknown_fields: @ignore_unknown_fields)
|
97
109
|
@klass.encode(val)
|
98
110
|
end
|
99
111
|
end
|
@@ -41,6 +41,51 @@ class BigQueryStorageWriteInsertOutputTest < Test::Unit::TestCase
|
|
41
41
|
assert_equal('/path/to/schema_rb', d.instance.proto_schema_rb_path)
|
42
42
|
assert_equal('Test', d.instance.proto_message_class_name)
|
43
43
|
end
|
44
|
+
|
45
|
+
test '"json_key" must be specified when auth_method set to "json_key"' do
|
46
|
+
assert_raises Fluent::ConfigError do
|
47
|
+
create_driver(%[
|
48
|
+
auth_method json_key
|
49
|
+
|
50
|
+
project sample-project
|
51
|
+
dataset test
|
52
|
+
table data
|
53
|
+
|
54
|
+
proto_schema_rb_path /path/to/schema_rb
|
55
|
+
proto_message_class_name Test
|
56
|
+
])
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
test '"email" and "private_key_path" must be specified when auth_method set to "private_key"' do
|
61
|
+
assert_raises Fluent::ConfigError do
|
62
|
+
create_driver(%[
|
63
|
+
auth_method private_key
|
64
|
+
private_key_path /path/to/key
|
65
|
+
|
66
|
+
project sample-project
|
67
|
+
dataset test
|
68
|
+
table data
|
69
|
+
|
70
|
+
proto_schema_rb_path /path/to/schema_rb
|
71
|
+
proto_message_class_name Test
|
72
|
+
])
|
73
|
+
end
|
74
|
+
|
75
|
+
assert_raises Fluent::ConfigError do
|
76
|
+
create_driver(%[
|
77
|
+
auth_method private_key
|
78
|
+
email hoge@hoge.com
|
79
|
+
|
80
|
+
project sample-project
|
81
|
+
dataset test
|
82
|
+
table data
|
83
|
+
|
84
|
+
proto_schema_rb_path /path/to/schema_rb
|
85
|
+
proto_message_class_name Test
|
86
|
+
])
|
87
|
+
end
|
88
|
+
end
|
44
89
|
end
|
45
90
|
|
46
91
|
private
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery-storage-write
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- gumigumi4f
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -128,6 +128,20 @@ dependencies:
|
|
128
128
|
- - ">="
|
129
129
|
- !ruby/object:Gem::Version
|
130
130
|
version: 1.3.0
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: google-api-client
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: 0.53.0
|
138
|
+
type: :runtime
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ">="
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: 0.53.0
|
131
145
|
description: Fluentd plugin to insert data into BigQuery
|
132
146
|
email:
|
133
147
|
- gumigumi4f@gmail.com
|
@@ -170,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
184
|
- !ruby/object:Gem::Version
|
171
185
|
version: '0'
|
172
186
|
requirements: []
|
173
|
-
rubygems_version: 3.
|
187
|
+
rubygems_version: 3.3.26
|
174
188
|
signing_key:
|
175
189
|
specification_version: 4
|
176
190
|
summary: Fluentd output plugin to insert data into BigQuery through storage write
|