fluent-plugin-bigquery-storage-write 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 95c405b96c77e504c30be911ef3821b2e4ff98ed817f142feb0eb927aa61b0f5
4
- data.tar.gz: 779be744e6a644f6e257f86a9840f3ce65ddef0b88325ddd3bec4723f9985f34
3
+ metadata.gz: 90b2ce80850ba17a1eb9f9f2745cd91bd6a3b876cb27340e349c3a63d5af0bc6
4
+ data.tar.gz: 1825d48e453d0723c23fccc3434cc17430acd56abcf8bb33399af973e8e48a14
5
5
  SHA512:
6
- metadata.gz: 8299958843ee11bfaaa2c06c15362173fd0c942e85e6aaa36873ac985b7669f876bcb107a72ee0ebd313b273b5da34d9f93e7e50161668fdc6bdc1fd194f35ec
7
- data.tar.gz: 8c0c2293d91382ae521c1572c57999256407f8754ee838ec81b1b1adacf65c927589db9b0af977f7f0a54b994c3842a1c0774e2a05b4c2d32af17c78fce0cfaf
6
+ metadata.gz: 954753c0de9faae62bb19653d04103f8819f23becaa6cc8f9cd4a9c29a1ca5ec1aba0901ba3bd92957c8f8e22e46e419a820bf1f11656269319d7c1e5d76dedd
7
+ data.tar.gz: 545aeb35cce6d292805790340134950e36aa88bf67567c2e8fc7ab8cabca5c256557dd71982237d19ddc91ad8458e78e8fbc7a61a4bf1ee45e21e1a74e4e74eb
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-bigquery-storage-write (0.1.0)
4
+ fluent-plugin-bigquery-storage-write (0.2.0)
5
5
  fluentd (>= 0.14.10, < 2)
6
+ google-api-client (>= 0.53.0)
6
7
  google-cloud-bigquery-storage (>= 1.3.0)
7
8
  googleauth (>= 1.5.2)
8
9
  grpc (>= 1.55)
@@ -10,11 +11,17 @@ PATH
10
11
  GEM
11
12
  remote: https://rubygems.org/
12
13
  specs:
14
+ activesupport (7.0.5)
15
+ concurrent-ruby (~> 1.0, >= 1.0.2)
16
+ i18n (>= 1.6, < 2)
17
+ minitest (>= 5.1)
18
+ tzinfo (~> 2.0)
13
19
  addressable (2.8.4)
14
20
  public_suffix (>= 2.0.2, < 6.0)
15
21
  concurrent-ruby (1.2.2)
16
22
  cool.io (1.7.1)
17
- faraday (2.7.6)
23
+ declarative (0.0.20)
24
+ faraday (2.7.7)
18
25
  faraday-net_http (>= 2.0, < 3.1)
19
26
  ruby2_keywords (>= 0.0.4)
20
27
  faraday-net_http (3.0.2)
@@ -40,6 +47,27 @@ GEM
40
47
  googleapis-common-protos-types (>= 1.3.1, < 2.a)
41
48
  googleauth (~> 1.0)
42
49
  grpc (~> 1.36)
50
+ gems (1.2.0)
51
+ google-api-client (0.53.0)
52
+ google-apis-core (~> 0.1)
53
+ google-apis-generator (~> 0.1)
54
+ google-apis-core (0.11.0)
55
+ addressable (~> 2.5, >= 2.5.1)
56
+ googleauth (>= 0.16.2, < 2.a)
57
+ httpclient (>= 2.8.1, < 3.a)
58
+ mini_mime (~> 1.0)
59
+ representable (~> 3.0)
60
+ retriable (>= 2.0, < 4.a)
61
+ rexml
62
+ webrick
63
+ google-apis-discovery_v1 (0.14.0)
64
+ google-apis-core (>= 0.11.0, < 2.a)
65
+ google-apis-generator (0.12.0)
66
+ activesupport (>= 5.0)
67
+ gems (~> 1.2)
68
+ google-apis-core (>= 0.11.0, < 2.a)
69
+ google-apis-discovery_v1 (~> 0.5)
70
+ thor (>= 0.20, < 2.a)
43
71
  google-cloud-bigquery-storage (1.3.0)
44
72
  google-cloud-bigquery-storage-v1 (>= 0.8, < 2.a)
45
73
  google-cloud-core (~> 1.6)
@@ -52,7 +80,8 @@ GEM
52
80
  google-cloud-env (1.6.0)
53
81
  faraday (>= 0.17.3, < 3.0)
54
82
  google-cloud-errors (1.3.1)
55
- google-protobuf (3.23.2-arm64-darwin)
83
+ google-protobuf (3.23.3-arm64-darwin)
84
+ google-protobuf (3.23.3-x86_64-linux)
56
85
  googleapis-common-protos (1.4.0)
57
86
  google-protobuf (~> 3.14)
58
87
  googleapis-common-protos-types (~> 1.2)
@@ -66,19 +95,33 @@ GEM
66
95
  multi_json (~> 1.11)
67
96
  os (>= 0.9, < 2.0)
68
97
  signet (>= 0.16, < 2.a)
69
- grpc (1.55.0)
98
+ grpc (1.56.0)
70
99
  google-protobuf (~> 3.23)
71
100
  googleapis-common-protos-types (~> 1.0)
72
- grpc-tools (1.55.0)
101
+ grpc (1.56.0-x86_64-linux)
102
+ google-protobuf (~> 3.23)
103
+ googleapis-common-protos-types (~> 1.0)
104
+ grpc-tools (1.56.0)
73
105
  http_parser.rb (0.8.0)
106
+ httpclient (2.8.3)
107
+ i18n (1.14.1)
108
+ concurrent-ruby (~> 1.0)
74
109
  jwt (2.7.1)
75
110
  memoist (0.16.2)
111
+ mini_mime (1.1.2)
112
+ minitest (5.18.1)
76
113
  msgpack (1.7.1)
77
114
  multi_json (1.15.0)
78
115
  os (1.1.4)
79
- power_assert (1.1.7)
116
+ power_assert (2.0.3)
80
117
  public_suffix (5.0.1)
81
- rake (13.0.1)
118
+ rake (13.0.6)
119
+ representable (3.2.0)
120
+ declarative (< 0.1.0)
121
+ trailblazer-option (>= 0.1.1, < 0.2.0)
122
+ uber (< 0.2.0)
123
+ retriable (3.1.2)
124
+ rexml (3.2.5)
82
125
  ruby2_keywords (0.0.5)
83
126
  serverengine (2.3.2)
84
127
  sigdump (~> 0.2.2)
@@ -89,17 +132,21 @@ GEM
89
132
  jwt (>= 1.5, < 3.0)
90
133
  multi_json (~> 1.10)
91
134
  strptime (0.2.5)
92
- test-unit (3.3.4)
135
+ test-unit (3.6.0)
93
136
  power_assert
137
+ thor (1.2.2)
138
+ trailblazer-option (0.1.2)
94
139
  tzinfo (2.0.6)
95
140
  concurrent-ruby (~> 1.0)
96
141
  tzinfo-data (1.2023.3)
97
142
  tzinfo (>= 1.0.0)
98
- webrick (1.6.1)
143
+ uber (0.1.0)
144
+ webrick (1.8.1)
99
145
  yajl-ruby (1.4.3)
100
146
 
101
147
  PLATFORMS
102
148
  arm64-darwin-22
149
+ x86_64-linux
103
150
 
104
151
  DEPENDENCIES
105
152
  bundler
data/README.md CHANGED
@@ -1,11 +1,14 @@
1
1
  # fluent-plugin-bigquery-storage-write
2
2
 
3
+ ![Test](https://github.com/gumigumi4f/fluent-plugin-bigquery-storage-write/workflows/Test/badge.svg)
4
+ [![Gem Version](https://badge.fury.io/rb/fluent-plugin-bigquery-storage-write.svg)](http://badge.fury.io/rb/fluent-plugin-bigquery-storage-write)
5
+
3
6
  [Fluentd](https://fluentd.org/) output plugin to insert data into BigQuery through storage write api.
4
7
 
5
8
  ## Overview
6
9
 
7
10
  Google Cloud Bigquery output plugin for [Fluentd](https://fluentd.org/).
8
- The main difference from [fluent-plugin-bigquery](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery) is that it uses BigQuery new API `Storage Write`.
11
+ The main difference from [fluent-plugin-bigquery](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery) is that it uses BigQuery new API called `Storage Write API`.
9
12
 
10
13
  Advantages of using the Storage Write API are described [here](https://cloud.google.com/bigquery/docs/write-api#advantages).
11
14
 
@@ -35,15 +38,19 @@ bundle
35
38
 
36
39
  ### bigquery_storage_write_insert
37
40
 
38
- | name | type | required? | default | description |
39
- |:---------------------------|:-------|:---------------|:--------------------|:-------------------------------------------------------------------------------------------------------------|
40
- | auth_method | enum | yes | application_default | `json_key` or `compute_engine` or `application_default` |
41
- | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
42
- | project | string | yes | nil | |
43
- | dataset | string | yes | nil | |
44
- | table | string | yes | nil | |
45
- | proto_schema_rb_path | string | yes | nil | Generated Protocol Buffers schema .rb file path. |
46
- | proto_message_class_name | string | no | nil | Class name of Protocol Buffers message. If not specified, table value that converted to pascal case is used. |
41
+ | name | type | required? | default | description |
42
+ |:-------------------------|:-------|:------------------|:--------------------|:-------------------------------------------------------------------------------------------------------------|
43
+ | auth_method | enum | yes | application_default | `private_key` or `json_key` or `compute_engine` or `application_default` |
44
+ | email | string | yes (private_key) | nil | GCP Service Account Email |
45
+ | private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
46
+ | private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
47
+ | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
48
+ | project | string | yes | nil | |
49
+ | dataset | string | yes | nil | |
50
+ | table | string | yes | nil | |
51
+ | ignore_unknown_fields | bool | no | true | If False, raise errors for unknown fields. |
52
+ | proto_schema_rb_path | string | yes | nil | Generated Protocol Buffers schema .rb file path. |
53
+ | proto_message_class_name | string | no | nil | Class name of Protocol Buffers message. If not specified, table value that converted to pascal case is used. |
47
54
 
48
55
  ### buffer section
49
56
 
@@ -69,7 +76,7 @@ Write code `.proto` and compile it using `protoc`.
69
76
  The sample code with BigQuery schema is located in the path below `proto/test_data.proto`.
70
77
 
71
78
  ```sh
72
- bundle exec grpc_tools_ruby_protoc -I proto --ruby_out=proto proto/test_data.proto
79
+ protoc -I proto --ruby_out=proto proto/test_data.proto
73
80
  ```
74
81
 
75
82
  Next, specify generated ruby code path to fluentd configuration file.
@@ -92,11 +99,16 @@ Next, specify generated ruby code path to fluentd configuration file.
92
99
  ## Tips
93
100
 
94
101
  - Can I dynamically retrieve and use the BigQuery table schema?
95
- - No, you have to use predefined schema generated from `protoc`.
102
+ - No, you have to use predefined schema generated by `protoc`.
96
103
  - Also, you have to create BigQuery table before using this plugin.
97
- - Where is the type conversions between Protocol Buffers and BigQuery?
98
- - https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
99
- - Note that some types, including google.protobuf.Timestamp, are not available due to [BigQuery limitation](https://github.com/googleapis/python-bigquery-storage/issues/257).
104
+ - Where is the type conversions docs between Protocol Buffers and BigQuery?
105
+ - See https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
106
+ - Note that some types, including `google.protobuf.Timestamp`, are not available due to [BigQuery limitation](https://github.com/googleapis/python-bigquery-storage/issues/257).
107
+ - Which protoc version do I need for compilation?
108
+ - [Protocol Buffers v23.0](https://github.com/protocolbuffers/protobuf/releases/tag/v23.0) is minimum version because it generates a serialized proto instead of the DSL.
109
+ - Is there any limitation on the Storage Write API?
110
+ - See https://cloud.google.com/bigquery/quotas?hl=ja#write-api-limits
111
+ - Especially, note that the maximum value of chunk_limit_size is limited to 10 MB.
100
112
 
101
113
  ## Copyright
102
114
 
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-bigquery-storage-write"
6
- spec.version = "0.1.0"
6
+ spec.version = "0.2.1"
7
7
  spec.authors = ["gumigumi4f"]
8
8
  spec.email = ["gumigumi4f@gmail.com"]
9
9
 
@@ -28,4 +28,5 @@ Gem::Specification.new do |spec|
28
28
  spec.add_runtime_dependency "grpc", ">= 1.55"
29
29
  spec.add_runtime_dependency "googleauth", ">= 1.5.2"
30
30
  spec.add_runtime_dependency "google-cloud-bigquery-storage", ">= 1.3.0"
31
+ spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
31
32
  end
@@ -15,8 +15,8 @@ module Fluent
15
15
  end
16
16
 
17
17
  def client
18
- @client ||= Google::Cloud::Bigquery::Storage::V1::BigQueryWrite::Client.new.tap do |cl|
19
- cl.configure.credentials = get_auth
18
+ @client ||= Google::Cloud::Bigquery::Storage::V1::BigQueryWrite::Client.new do |cf|
19
+ cf.credentials = get_auth
20
20
  end
21
21
  end
22
22
 
@@ -41,6 +41,8 @@ module Fluent
41
41
 
42
42
  def get_auth
43
43
  case @auth_method
44
+ when :private_key
45
+ get_auth_from_private_key
44
46
  when :compute_engine
45
47
  get_auth_from_compute_engine
46
48
  when :json_key
@@ -52,6 +54,22 @@ module Fluent
52
54
  end
53
55
  end
54
56
 
57
+ def get_auth_from_private_key
58
+ require 'google/api_client/auth/key_utils'
59
+ private_key_path = @options[:private_key_path]
60
+ private_key_passphrase = @options[:private_key_passphrase]
61
+ email = @options[:email]
62
+
63
+ key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
64
+ Signet::OAuth2::Client.new(
65
+ token_credential_uri: "https://accounts.google.com/o/oauth2/token",
66
+ audience: "https://accounts.google.com/o/oauth2/token",
67
+ scope: @scope,
68
+ issuer: email,
69
+ signing_key: key
70
+ )
71
+ end
72
+
55
73
  def get_auth_from_compute_engine
56
74
  Google::Auth::GCECredentials.new
57
75
  end
@@ -14,13 +14,18 @@ module Fluent
14
14
 
15
15
  helpers :inject
16
16
 
17
- config_param :auth_method, :enum, list: [:compute_engine, :json_key, :application_default], default: :application_default
17
+ config_param :auth_method, :enum, list: [:private_key, :compute_engine, :json_key, :application_default], default: :application_default
18
+ config_param :email, :string, default: nil
19
+ config_param :private_key_path, :string, default: nil
20
+ config_param :private_key_passphrase, :string, default: 'notasecret', secret: true
18
21
  config_param :json_key, default: nil, secret: true
19
22
 
20
23
  config_param :project, :string
21
24
  config_param :dataset, :string
22
25
  config_param :table, :string
23
26
 
27
+ config_param :ignore_unknown_fields, :bool, default: true
28
+
24
29
  config_param :proto_schema_rb_path, :string
25
30
  config_param :proto_message_class_name, :string, default: nil
26
31
 
@@ -39,6 +44,10 @@ module Fluent
39
44
  super
40
45
 
41
46
  case @auth_method
47
+ when :private_key
48
+ unless @email && @private_key_path
49
+ raise Fluent::ConfigError, "'email' and 'private_key_path' must be specified if auth_method == 'private_key'"
50
+ end
42
51
  when :compute_engine
43
52
  # Do nothing
44
53
  when :json_key
@@ -71,6 +80,9 @@ module Fluent
71
80
  @klass = Google::Protobuf::DescriptorPool.generated_pool.lookup(message_cls_name).msgclass
72
81
 
73
82
  @writer = Fluent::BigQuery::Storage::Writer.new(@log, @auth_method, @project, @dataset, @table, @descriptor_proto,
83
+ private_key_path: @private_key_path,
84
+ private_key_passphrase: @private_key_passphrase,
85
+ email: @email,
74
86
  json_key: @json_key)
75
87
  rescue => e
76
88
  log.error("initialize error")
@@ -93,7 +105,7 @@ module Fluent
93
105
  def write(chunk)
94
106
  rows = chunk.open do |io|
95
107
  io.map do |line|
96
- val = @klass.decode_json(line, ignore_unknown_fields: true)
108
+ val = @klass.decode_json(line, ignore_unknown_fields: @ignore_unknown_fields)
97
109
  @klass.encode(val)
98
110
  end
99
111
  end
@@ -41,6 +41,51 @@ class BigQueryStorageWriteInsertOutputTest < Test::Unit::TestCase
41
41
  assert_equal('/path/to/schema_rb', d.instance.proto_schema_rb_path)
42
42
  assert_equal('Test', d.instance.proto_message_class_name)
43
43
  end
44
+
45
+ test '"json_key" must be specified when auth_method set to "json_key"' do
46
+ assert_raises Fluent::ConfigError do
47
+ create_driver(%[
48
+ auth_method json_key
49
+
50
+ project sample-project
51
+ dataset test
52
+ table data
53
+
54
+ proto_schema_rb_path /path/to/schema_rb
55
+ proto_message_class_name Test
56
+ ])
57
+ end
58
+ end
59
+
60
+ test '"email" and "private_key_path" must be specified when auth_method set to "private_key"' do
61
+ assert_raises Fluent::ConfigError do
62
+ create_driver(%[
63
+ auth_method private_key
64
+ private_key_path /path/to/key
65
+
66
+ project sample-project
67
+ dataset test
68
+ table data
69
+
70
+ proto_schema_rb_path /path/to/schema_rb
71
+ proto_message_class_name Test
72
+ ])
73
+ end
74
+
75
+ assert_raises Fluent::ConfigError do
76
+ create_driver(%[
77
+ auth_method private_key
78
+ email hoge@hoge.com
79
+
80
+ project sample-project
81
+ dataset test
82
+ table data
83
+
84
+ proto_schema_rb_path /path/to/schema_rb
85
+ proto_message_class_name Test
86
+ ])
87
+ end
88
+ end
44
89
  end
45
90
 
46
91
  private
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery-storage-write
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - gumigumi4f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-15 00:00:00.000000000 Z
11
+ date: 2023-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -128,6 +128,20 @@ dependencies:
128
128
  - - ">="
129
129
  - !ruby/object:Gem::Version
130
130
  version: 1.3.0
131
+ - !ruby/object:Gem::Dependency
132
+ name: google-api-client
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: 0.53.0
138
+ type: :runtime
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: 0.53.0
131
145
  description: Fluentd plugin to insert data into BigQuery
132
146
  email:
133
147
  - gumigumi4f@gmail.com
@@ -170,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
170
184
  - !ruby/object:Gem::Version
171
185
  version: '0'
172
186
  requirements: []
173
- rubygems_version: 3.1.6
187
+ rubygems_version: 3.3.26
174
188
  signing_key:
175
189
  specification_version: 4
176
190
  summary: Fluentd output plugin to insert data into BigQuery through storage write