fluent-plugin-bigquery 3.1.0 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +2 -2
- data/.github/workflows/windows.yml +2 -2
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/README.md +20 -18
- data/lib/fluent/plugin/bigquery/schema.rb +11 -0
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery_base.rb +1 -1
- data/test/plugin/test_record_schema.rb +21 -6
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 762f6e4f78f96c8c6912dbdd5780aa9b79902bbe7ca2df7dee60dcc0897c0e80
|
4
|
+
data.tar.gz: ff7e0e61dddb066bdeced537521255743aa1b98cbc2e539a1e2076e4ef93d9ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e1bd8c1cfca5dd43bebb2e9b4e2b4e2630c9e6176e1a03606b3ac2c289f23e049d4b8333bc39fbbc0142cb4dc66de2437fdd1e46c2c69c283e6e8db895f7ca6
|
7
|
+
data.tar.gz: 0a55797f85a64d787020c443041fb3ee2f08525238122ab1ff51f901467c2a349fa3678726e1ac09e25bcb97b141c5168f66192bcd40319355e6dd212793db94
|
data/.github/workflows/linux.yml
CHANGED
@@ -9,9 +9,10 @@ jobs:
|
|
9
9
|
fail-fast: false
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- 2.7
|
13
12
|
- 3.0
|
14
13
|
- 3.1
|
14
|
+
- 3.2
|
15
|
+
- 3.3
|
15
16
|
os:
|
16
17
|
- ubuntu-latest
|
17
18
|
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
@@ -25,6 +26,5 @@ jobs:
|
|
25
26
|
CI: true
|
26
27
|
run: |
|
27
28
|
ruby -v
|
28
|
-
gem install bundler rake
|
29
29
|
bundle install --jobs 4 --retry 3
|
30
30
|
bundle exec rake test
|
@@ -9,9 +9,10 @@ jobs:
|
|
9
9
|
fail-fast: false
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- 2.7
|
13
12
|
- 3.0
|
14
13
|
- 3.1
|
14
|
+
- 3.2
|
15
|
+
- 3.3
|
15
16
|
os:
|
16
17
|
- windows-latest
|
17
18
|
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
@@ -25,6 +26,5 @@ jobs:
|
|
25
26
|
CI: true
|
26
27
|
run: |
|
27
28
|
ruby -v
|
28
|
-
gem install bundler rake
|
29
29
|
bundle install --jobs 4 --retry 3
|
30
30
|
bundle exec rake test
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
## [v3.1.0](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/compare/v3.0.1...v3.1.0) (2022-12-16)
|
2
|
+
|
3
|
+
|
4
|
+
### Features
|
5
|
+
|
6
|
+
* Support GEOGRAPHY type field ([#201](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/201)) ([734faa9](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/734faa9adb7cec1ed579fc6a0bd9ce72d48b82d0))
|
7
|
+
* Support JSON type field ([#204](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/204)) ([ec62bfa](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/ec62bfa2f858feb440e8bb8e8f8d6b8689f709bb))
|
8
|
+
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ If you use official alpine based fluentd docker image (https://github.com/fluent
|
|
30
30
|
You need to install `bigdecimal` gem on your own dockerfile.
|
31
31
|
Because alpine based image has only minimal ruby environment in order to reduce image size.
|
32
32
|
And in most case, dependency to embedded gem is not written on gemspec.
|
33
|
-
Because
|
33
|
+
Because embedded gem dependency sometimes restricts ruby environment.
|
34
34
|
|
35
35
|
## Configuration
|
36
36
|
|
@@ -40,7 +40,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
40
40
|
|
41
41
|
| name | type | required? | placeholder? | default | description |
|
42
42
|
| :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
43
|
-
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default`
|
43
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` (GKE Workload Identity) |
|
44
44
|
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
45
45
|
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
46
46
|
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
@@ -59,7 +59,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
59
59
|
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
60
60
|
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
61
61
|
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
62
|
-
| time_partitioning_type | enum | no (either day)
|
62
|
+
| time_partitioning_type | enum | no (either day or hour) | no | nil | Type of bigquery time partitioning feature. |
|
63
63
|
| time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition. |
|
64
64
|
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. |
|
65
65
|
| clustering_fields | array(string) | no | no | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
@@ -194,15 +194,15 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
194
194
|
```apache
|
195
195
|
<match dummy>
|
196
196
|
@type bigquery_insert
|
197
|
-
|
197
|
+
|
198
198
|
<buffer>
|
199
199
|
flush_interval 0.1 # flush as frequent as possible
|
200
|
-
|
200
|
+
|
201
201
|
total_limit_size 10g
|
202
|
-
|
202
|
+
|
203
203
|
flush_thread_count 16
|
204
204
|
</buffer>
|
205
|
-
|
205
|
+
|
206
206
|
auth_method private_key # default
|
207
207
|
email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
|
208
208
|
private_key_path /home/username/.keys/00000000000000000000000000000000-privatekey.p12
|
@@ -255,7 +255,7 @@ Important options for high rate events are:
|
|
255
255
|
* threads for insert api calls in parallel
|
256
256
|
* specify this option for 100 or more records per seconds
|
257
257
|
* 10 or more threads seems good for inserts over internet
|
258
|
-
*
|
258
|
+
* fewer threads may be good for Google Compute Engine instances (with low latency for BigQuery)
|
259
259
|
* `buffer/flush_interval`
|
260
260
|
* interval between data flushes (default 0.25)
|
261
261
|
* you can set subsecond values such as `0.15` on Fluentd v0.10.42 or later
|
@@ -294,7 +294,7 @@ There are four methods supported to fetch access token for the service account.
|
|
294
294
|
1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
|
295
295
|
2. JSON key of GCP(Google Cloud Platform)'s service account
|
296
296
|
3. Predefined access token (Compute Engine only)
|
297
|
-
4. Google application default credentials
|
297
|
+
4. [Google application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) / GKE Workload Identity
|
298
298
|
|
299
299
|
#### Public-Private key pair of GCP's service account
|
300
300
|
|
@@ -339,7 +339,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
339
339
|
|
340
340
|
#### Predefined access token (Compute Engine only)
|
341
341
|
|
342
|
-
When you run fluentd on
|
342
|
+
When you run fluentd on Google Compute Engine instance,
|
343
343
|
you don't need to explicitly create a service account for fluentd.
|
344
344
|
In this authentication method, you need to add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
|
345
345
|
Compute Engine instance, then you can configure fluentd like this.
|
@@ -360,14 +360,16 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
360
360
|
|
361
361
|
#### Application default credentials
|
362
362
|
|
363
|
-
The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at
|
363
|
+
The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at https://cloud.google.com/docs/authentication/application-default-credentials.
|
364
|
+
|
365
|
+
**This is the method you should choose if you want to use Workload Identity on GKE**.
|
364
366
|
|
365
367
|
In this authentication method, the credentials returned are determined by the environment the code is running in. Conditions are checked in the following order:credentials are get from following order.
|
366
368
|
|
367
369
|
1. The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is checked. If this variable is specified it should point to a JSON key file that defines the credentials.
|
368
|
-
2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If
|
369
|
-
3. Well known path is checked. If file
|
370
|
-
4. System default path is checked. If file
|
370
|
+
2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If these variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
|
371
|
+
3. Well known path is checked. If the file exists, it is used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
|
372
|
+
4. System default path is checked. If the file exists, it is used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
|
371
373
|
5. If you are running in Google Compute Engine production, the built-in service account associated with the virtual machine instance will be used.
|
372
374
|
6. If none of these conditions is true, an error will occur.
|
373
375
|
|
@@ -543,11 +545,11 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
543
545
|
@type bigquery_insert
|
544
546
|
|
545
547
|
...
|
546
|
-
|
548
|
+
|
547
549
|
schema_path /path/to/httpd.schema
|
548
550
|
</match>
|
549
551
|
```
|
550
|
-
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full
|
552
|
+
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexibility.
|
551
553
|
|
552
554
|
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
553
555
|
|
@@ -556,7 +558,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
556
558
|
@type bigquery_insert
|
557
559
|
|
558
560
|
...
|
559
|
-
|
561
|
+
|
560
562
|
fetch_schema true
|
561
563
|
# fetch_schema_table other_table # if you want to fetch schema from other table
|
562
564
|
</match>
|
@@ -594,5 +596,5 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
594
596
|
## Authors
|
595
597
|
|
596
598
|
* @tagomoris: First author, original version
|
597
|
-
* KAIZEN platform Inc.:
|
599
|
+
* KAIZEN platform Inc.: Maintainer, Since 2014.08.19
|
598
600
|
* @joker1007
|
@@ -116,6 +116,16 @@ module Fluent
|
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
119
|
+
class BigNumericFieldSchema < FieldSchema
|
120
|
+
def type
|
121
|
+
:bignumeric
|
122
|
+
end
|
123
|
+
|
124
|
+
def format_one(value, is_load: false)
|
125
|
+
value.to_s
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
119
129
|
class BooleanFieldSchema < FieldSchema
|
120
130
|
def type
|
121
131
|
:boolean
|
@@ -200,6 +210,7 @@ module Fluent
|
|
200
210
|
integer: IntegerFieldSchema,
|
201
211
|
float: FloatFieldSchema,
|
202
212
|
numeric: NumericFieldSchema,
|
213
|
+
bignumeric: BigNumericFieldSchema,
|
203
214
|
boolean: BooleanFieldSchema,
|
204
215
|
timestamp: TimestampFieldSchema,
|
205
216
|
date: DateFieldSchema,
|
@@ -69,7 +69,7 @@ module Fluent
|
|
69
69
|
config_param :request_open_timeout_sec, :time, default: 60
|
70
70
|
|
71
71
|
## Partitioning
|
72
|
-
config_param :time_partitioning_type, :enum, list: [:day], default: nil
|
72
|
+
config_param :time_partitioning_type, :enum, list: [:day, :hour], default: nil
|
73
73
|
config_param :time_partitioning_field, :string, default: nil
|
74
74
|
config_param :time_partitioning_expiration, :time, default: nil
|
75
75
|
|
@@ -29,9 +29,14 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
29
29
|
"mode" => "REPEATED"
|
30
30
|
},
|
31
31
|
{
|
32
|
-
"name" => "
|
32
|
+
"name" => "utilization",
|
33
33
|
"type" => "NUMERIC",
|
34
34
|
"mode" => "NULLABLE"
|
35
|
+
},
|
36
|
+
{
|
37
|
+
"name" => "bigutilization",
|
38
|
+
"type" => "BIGNUMERIC",
|
39
|
+
"mode" => "NULLABLE"
|
35
40
|
}
|
36
41
|
]
|
37
42
|
end
|
@@ -64,15 +69,20 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
64
69
|
"mode" => "REPEATED"
|
65
70
|
},
|
66
71
|
{
|
67
|
-
"name" => "
|
72
|
+
"name" => "utilization",
|
68
73
|
"type" => "NUMERIC",
|
69
74
|
"mode" => "NULLABLE"
|
70
75
|
},
|
76
|
+
{
|
77
|
+
"name" => "bigutilization",
|
78
|
+
"type" => "BIGNUMERIC",
|
79
|
+
"mode" => "NULLABLE"
|
80
|
+
},
|
71
81
|
{
|
72
82
|
"name" => "new_column",
|
73
83
|
"type" => "STRING",
|
74
84
|
"mode" => "REQUIRED"
|
75
|
-
}
|
85
|
+
},
|
76
86
|
]
|
77
87
|
end
|
78
88
|
|
@@ -104,9 +114,14 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
104
114
|
"mode" => "REPEATED"
|
105
115
|
},
|
106
116
|
{
|
107
|
-
"name" => "
|
117
|
+
"name" => "utilization",
|
108
118
|
"type" => "NUMERIC",
|
109
119
|
"mode" => "NULLABLE"
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"name" => "bigutilization",
|
123
|
+
"type" => "BIGNUMERIC",
|
124
|
+
"mode" => "NULLABLE"
|
110
125
|
}
|
111
126
|
]
|
112
127
|
end
|
@@ -157,12 +172,12 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
157
172
|
time = Time.local(2016, 2, 7, 19, 0, 0).utc
|
158
173
|
|
159
174
|
formatted = fields.format_one({
|
160
|
-
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "
|
175
|
+
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilization" => "0.837", "bigutilization" => "0.837"
|
161
176
|
})
|
162
177
|
assert_equal(
|
163
178
|
formatted,
|
164
179
|
{
|
165
|
-
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "
|
180
|
+
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilization" => "0.837", "bigutilization" => "0.837"
|
166
181
|
}
|
167
182
|
)
|
168
183
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-10-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -142,6 +142,7 @@ files:
|
|
142
142
|
- ".github/workflows/linux.yml"
|
143
143
|
- ".github/workflows/windows.yml"
|
144
144
|
- ".gitignore"
|
145
|
+
- CHANGELOG.md
|
145
146
|
- Gemfile
|
146
147
|
- LICENSE.txt
|
147
148
|
- README.md
|
@@ -190,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
191
|
- !ruby/object:Gem::Version
|
191
192
|
version: '0'
|
192
193
|
requirements: []
|
193
|
-
rubygems_version: 3.
|
194
|
+
rubygems_version: 3.5.11
|
194
195
|
signing_key:
|
195
196
|
specification_version: 4
|
196
197
|
summary: Fluentd plugin to store data on Google BigQuery
|