fluent-plugin-bigquery-storage-write 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 95c405b96c77e504c30be911ef3821b2e4ff98ed817f142feb0eb927aa61b0f5
4
- data.tar.gz: 779be744e6a644f6e257f86a9840f3ce65ddef0b88325ddd3bec4723f9985f34
3
+ metadata.gz: 18320bce875e8a3f2dfa1a7e82d656f5dc334cf02c030801603a1e4d39b1a3cc
4
+ data.tar.gz: b2a01db5bc3d78d9611c33be4390579679319840626cf11c1a0040e2af61621a
5
5
  SHA512:
6
- metadata.gz: 8299958843ee11bfaaa2c06c15362173fd0c942e85e6aaa36873ac985b7669f876bcb107a72ee0ebd313b273b5da34d9f93e7e50161668fdc6bdc1fd194f35ec
7
- data.tar.gz: 8c0c2293d91382ae521c1572c57999256407f8754ee838ec81b1b1adacf65c927589db9b0af977f7f0a54b994c3842a1c0774e2a05b4c2d32af17c78fce0cfaf
6
+ metadata.gz: 228f38f0b0ffbe28701bf90bc33de292a18e7b6d5f3654a87dac6ca204cece0faa74d600410655919c119ceffc25e40a23e8a257125d3d7694d827e8cf944720
7
+ data.tar.gz: 37139f080689172c6e95ad083b11666131484febb1c2c69e8acbde6351bc04d23bffaf204d8743713d8c5dd34789277ef397bf871e467d3cfe655098ff33c417
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-bigquery-storage-write (0.1.0)
4
+ fluent-plugin-bigquery-storage-write (0.2.0)
5
5
  fluentd (>= 0.14.10, < 2)
6
+ google-api-client (>= 0.53.0)
6
7
  google-cloud-bigquery-storage (>= 1.3.0)
7
8
  googleauth (>= 1.5.2)
8
9
  grpc (>= 1.55)
@@ -10,10 +11,16 @@ PATH
10
11
  GEM
11
12
  remote: https://rubygems.org/
12
13
  specs:
14
+ activesupport (7.0.5)
15
+ concurrent-ruby (~> 1.0, >= 1.0.2)
16
+ i18n (>= 1.6, < 2)
17
+ minitest (>= 5.1)
18
+ tzinfo (~> 2.0)
13
19
  addressable (2.8.4)
14
20
  public_suffix (>= 2.0.2, < 6.0)
15
21
  concurrent-ruby (1.2.2)
16
22
  cool.io (1.7.1)
23
+ declarative (0.0.20)
17
24
  faraday (2.7.6)
18
25
  faraday-net_http (>= 2.0, < 3.1)
19
26
  ruby2_keywords (>= 0.0.4)
@@ -40,6 +47,27 @@ GEM
40
47
  googleapis-common-protos-types (>= 1.3.1, < 2.a)
41
48
  googleauth (~> 1.0)
42
49
  grpc (~> 1.36)
50
+ gems (1.2.0)
51
+ google-api-client (0.53.0)
52
+ google-apis-core (~> 0.1)
53
+ google-apis-generator (~> 0.1)
54
+ google-apis-core (0.11.0)
55
+ addressable (~> 2.5, >= 2.5.1)
56
+ googleauth (>= 0.16.2, < 2.a)
57
+ httpclient (>= 2.8.1, < 3.a)
58
+ mini_mime (~> 1.0)
59
+ representable (~> 3.0)
60
+ retriable (>= 2.0, < 4.a)
61
+ rexml
62
+ webrick
63
+ google-apis-discovery_v1 (0.14.0)
64
+ google-apis-core (>= 0.11.0, < 2.a)
65
+ google-apis-generator (0.12.0)
66
+ activesupport (>= 5.0)
67
+ gems (~> 1.2)
68
+ google-apis-core (>= 0.11.0, < 2.a)
69
+ google-apis-discovery_v1 (~> 0.5)
70
+ thor (>= 0.20, < 2.a)
43
71
  google-cloud-bigquery-storage (1.3.0)
44
72
  google-cloud-bigquery-storage-v1 (>= 0.8, < 2.a)
45
73
  google-cloud-core (~> 1.6)
@@ -71,14 +99,25 @@ GEM
71
99
  googleapis-common-protos-types (~> 1.0)
72
100
  grpc-tools (1.55.0)
73
101
  http_parser.rb (0.8.0)
102
+ httpclient (2.8.3)
103
+ i18n (1.14.1)
104
+ concurrent-ruby (~> 1.0)
74
105
  jwt (2.7.1)
75
106
  memoist (0.16.2)
107
+ mini_mime (1.1.2)
108
+ minitest (5.18.0)
76
109
  msgpack (1.7.1)
77
110
  multi_json (1.15.0)
78
111
  os (1.1.4)
79
112
  power_assert (1.1.7)
80
113
  public_suffix (5.0.1)
81
114
  rake (13.0.1)
115
+ representable (3.2.0)
116
+ declarative (< 0.1.0)
117
+ trailblazer-option (>= 0.1.1, < 0.2.0)
118
+ uber (< 0.2.0)
119
+ retriable (3.1.2)
120
+ rexml (3.2.5)
82
121
  ruby2_keywords (0.0.5)
83
122
  serverengine (2.3.2)
84
123
  sigdump (~> 0.2.2)
@@ -91,10 +130,13 @@ GEM
91
130
  strptime (0.2.5)
92
131
  test-unit (3.3.4)
93
132
  power_assert
133
+ thor (1.2.2)
134
+ trailblazer-option (0.1.2)
94
135
  tzinfo (2.0.6)
95
136
  concurrent-ruby (~> 1.0)
96
137
  tzinfo-data (1.2023.3)
97
138
  tzinfo (>= 1.0.0)
139
+ uber (0.1.0)
98
140
  webrick (1.6.1)
99
141
  yajl-ruby (1.4.3)
100
142
 
data/README.md CHANGED
@@ -1,11 +1,14 @@
1
1
  # fluent-plugin-bigquery-storage-write
2
2
 
3
+ ![Test](https://github.com/gumigumi4f/fluent-plugin-bigquery-storage-write/workflows/Test/badge.svg)
4
+ [![Gem Version](https://badge.fury.io/rb/fluent-plugin-bigquery-storage-write.svg)](http://badge.fury.io/rb/fluent-plugin-bigquery-storage-write)
5
+
3
6
  [Fluentd](https://fluentd.org/) output plugin to insert data into BigQuery through storage write api.
4
7
 
5
8
  ## Overview
6
9
 
7
10
  Google Cloud Bigquery output plugin for [Fluentd](https://fluentd.org/).
8
- The main difference from [fluent-plugin-bigquery](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery) is that it uses BigQuery new API `Storage Write`.
11
+ The main difference from [fluent-plugin-bigquery](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery) is that it uses BigQuery new API called `Storage Write API`.
9
12
 
10
13
  Advantages of using the Storage Write API are described [here](https://cloud.google.com/bigquery/docs/write-api#advantages).
11
14
 
@@ -35,15 +38,19 @@ bundle
35
38
 
36
39
  ### bigquery_storage_write_insert
37
40
 
38
- | name | type | required? | default | description |
39
- |:---------------------------|:-------|:---------------|:--------------------|:-------------------------------------------------------------------------------------------------------------|
40
- | auth_method | enum | yes | application_default | `json_key` or `compute_engine` or `application_default` |
41
- | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
42
- | project | string | yes | nil | |
43
- | dataset | string | yes | nil | |
44
- | table | string | yes | nil | |
45
- | proto_schema_rb_path | string | yes | nil | Generated Protocol Buffers schema .rb file path. |
46
- | proto_message_class_name | string | no | nil | Class name of Protocol Buffers message. If not specified, table value that converted to pascal case is used. |
41
+ | name | type | required? | default | description |
42
+ |:-------------------------|:-------|:------------------|:--------------------|:-------------------------------------------------------------------------------------------------------------|
43
+ | auth_method | enum | yes | application_default | `private_key` or `json_key` or `compute_engine` or `application_default` |
44
+ | email | string | yes (private_key) | nil | GCP Service Account Email |
45
+ | private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
46
+ | private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
47
+ | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
48
+ | project | string | yes | nil | |
49
+ | dataset | string | yes | nil | |
50
+ | table | string | yes | nil | |
51
+ | ignore_unknown_fields | bool | no | true | If False, raise errors for unknown fields. |
52
+ | proto_schema_rb_path | string | yes | nil | Generated Protocol Buffers schema .rb file path. |
53
+ | proto_message_class_name | string | no | nil | Class name of Protocol Buffers message. If not specified, table value that converted to pascal case is used. |
47
54
 
48
55
  ### buffer section
49
56
 
@@ -69,7 +76,7 @@ Write code `.proto` and compile it using `protoc`.
69
76
  The sample code with BigQuery schema is located in the path below `proto/test_data.proto`.
70
77
 
71
78
  ```sh
72
- bundle exec grpc_tools_ruby_protoc -I proto --ruby_out=proto proto/test_data.proto
79
+ protoc -I proto --ruby_out=proto proto/test_data.proto
73
80
  ```
74
81
 
75
82
  Next, specify generated ruby code path to fluentd configuration file.
@@ -92,11 +99,16 @@ Next, specify generated ruby code path to fluentd configuration file.
92
99
  ## Tips
93
100
 
94
101
  - Can I dynamically retrieve and use the BigQuery table schema?
95
- - No, you have to use predefined schema generated from `protoc`.
102
+ - No, you have to use predefined schema generated by `protoc`.
96
103
  - Also, you have to create BigQuery table before using this plugin.
97
- - Where is the type conversions between Protocol Buffers and BigQuery?
98
- - https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
99
- - Note that some types, including google.protobuf.Timestamp, are not available due to [BigQuery limitation](https://github.com/googleapis/python-bigquery-storage/issues/257).
104
+ - Where is the type conversions docs between Protocol Buffers and BigQuery?
105
+ - See https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
106
+ - Note that some types, including `google.protobuf.Timestamp`, are not available due to [BigQuery limitation](https://github.com/googleapis/python-bigquery-storage/issues/257).
107
+ - Which protoc version do I need for compilation?
108
+ - [Protocol Buffers v23.0](https://github.com/protocolbuffers/protobuf/releases/tag/v23.0) is minimum version because it generates a serialized proto instead of the DSL.
109
+ - Is there any limitation on the Storage Write API?
110
+ - See https://cloud.google.com/bigquery/quotas?hl=ja#write-api-limits
111
+ - Especially, note that the maximum value of chunk_limit_size is limited to 10 MB.
100
112
 
101
113
  ## Copyright
102
114
 
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-bigquery-storage-write"
6
- spec.version = "0.1.0"
6
+ spec.version = "0.2.0"
7
7
  spec.authors = ["gumigumi4f"]
8
8
  spec.email = ["gumigumi4f@gmail.com"]
9
9
 
@@ -28,4 +28,5 @@ Gem::Specification.new do |spec|
28
28
  spec.add_runtime_dependency "grpc", ">= 1.55"
29
29
  spec.add_runtime_dependency "googleauth", ">= 1.5.2"
30
30
  spec.add_runtime_dependency "google-cloud-bigquery-storage", ">= 1.3.0"
31
+ spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
31
32
  end
@@ -41,6 +41,8 @@ module Fluent
41
41
 
42
42
  def get_auth
43
43
  case @auth_method
44
+ when :private_key
45
+ get_auth_from_private_key
44
46
  when :compute_engine
45
47
  get_auth_from_compute_engine
46
48
  when :json_key
@@ -52,6 +54,22 @@ module Fluent
52
54
  end
53
55
  end
54
56
 
57
+ def get_auth_from_private_key
58
+ require 'google/api_client/auth/key_utils'
59
+ private_key_path = @options[:private_key_path]
60
+ private_key_passphrase = @options[:private_key_passphrase]
61
+ email = @options[:email]
62
+
63
+ key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
64
+ Signet::OAuth2::Client.new(
65
+ token_credential_uri: "https://accounts.google.com/o/oauth2/token",
66
+ audience: "https://accounts.google.com/o/oauth2/token",
67
+ scope: @scope,
68
+ issuer: email,
69
+ signing_key: key
70
+ )
71
+ end
72
+
55
73
  def get_auth_from_compute_engine
56
74
  Google::Auth::GCECredentials.new
57
75
  end
@@ -14,13 +14,18 @@ module Fluent
14
14
 
15
15
  helpers :inject
16
16
 
17
- config_param :auth_method, :enum, list: [:compute_engine, :json_key, :application_default], default: :application_default
17
+ config_param :auth_method, :enum, list: [:private_key, :compute_engine, :json_key, :application_default], default: :application_default
18
+ config_param :email, :string, default: nil
19
+ config_param :private_key_path, :string, default: nil
20
+ config_param :private_key_passphrase, :string, default: 'notasecret', secret: true
18
21
  config_param :json_key, default: nil, secret: true
19
22
 
20
23
  config_param :project, :string
21
24
  config_param :dataset, :string
22
25
  config_param :table, :string
23
26
 
27
+ config_param :ignore_unknown_fields, :bool, default: true
28
+
24
29
  config_param :proto_schema_rb_path, :string
25
30
  config_param :proto_message_class_name, :string, default: nil
26
31
 
@@ -39,6 +44,10 @@ module Fluent
39
44
  super
40
45
 
41
46
  case @auth_method
47
+ when :private_key
48
+ unless @email && @private_key_path
49
+ raise Fluent::ConfigError, "'email' and 'private_key_path' must be specified if auth_method == 'private_key'"
50
+ end
42
51
  when :compute_engine
43
52
  # Do nothing
44
53
  when :json_key
@@ -71,6 +80,9 @@ module Fluent
71
80
  @klass = Google::Protobuf::DescriptorPool.generated_pool.lookup(message_cls_name).msgclass
72
81
 
73
82
  @writer = Fluent::BigQuery::Storage::Writer.new(@log, @auth_method, @project, @dataset, @table, @descriptor_proto,
83
+ private_key_path: @private_key_path,
84
+ private_key_passphrase: @private_key_passphrase,
85
+ email: @email,
74
86
  json_key: @json_key)
75
87
  rescue => e
76
88
  log.error("initialize error")
@@ -93,7 +105,7 @@ module Fluent
93
105
  def write(chunk)
94
106
  rows = chunk.open do |io|
95
107
  io.map do |line|
96
- val = @klass.decode_json(line, ignore_unknown_fields: true)
108
+ val = @klass.decode_json(line, ignore_unknown_fields: @ignore_unknown_fields)
97
109
  @klass.encode(val)
98
110
  end
99
111
  end
@@ -41,6 +41,51 @@ class BigQueryStorageWriteInsertOutputTest < Test::Unit::TestCase
41
41
  assert_equal('/path/to/schema_rb', d.instance.proto_schema_rb_path)
42
42
  assert_equal('Test', d.instance.proto_message_class_name)
43
43
  end
44
+
45
+ test '"json_key" must be specified when auth_method set to "json_key"' do
46
+ assert_raises Fluent::ConfigError do
47
+ create_driver(%[
48
+ auth_method json_key
49
+
50
+ project sample-project
51
+ dataset test
52
+ table data
53
+
54
+ proto_schema_rb_path /path/to/schema_rb
55
+ proto_message_class_name Test
56
+ ])
57
+ end
58
+ end
59
+
60
+ test '"email" and "private_key_path" must be specified when auth_method set to "private_key"' do
61
+ assert_raises Fluent::ConfigError do
62
+ create_driver(%[
63
+ auth_method private_key
64
+ private_key_path /path/to/key
65
+
66
+ project sample-project
67
+ dataset test
68
+ table data
69
+
70
+ proto_schema_rb_path /path/to/schema_rb
71
+ proto_message_class_name Test
72
+ ])
73
+ end
74
+
75
+ assert_raises Fluent::ConfigError do
76
+ create_driver(%[
77
+ auth_method private_key
78
+ email hoge@hoge.com
79
+
80
+ project sample-project
81
+ dataset test
82
+ table data
83
+
84
+ proto_schema_rb_path /path/to/schema_rb
85
+ proto_message_class_name Test
86
+ ])
87
+ end
88
+ end
44
89
  end
45
90
 
46
91
  private
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery-storage-write
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - gumigumi4f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-15 00:00:00.000000000 Z
11
+ date: 2023-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -128,6 +128,20 @@ dependencies:
128
128
  - - ">="
129
129
  - !ruby/object:Gem::Version
130
130
  version: 1.3.0
131
+ - !ruby/object:Gem::Dependency
132
+ name: google-api-client
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: 0.53.0
138
+ type: :runtime
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: 0.53.0
131
145
  description: Fluentd plugin to insert data into BigQuery
132
146
  email:
133
147
  - gumigumi4f@gmail.com