fluent-plugin-bigquery 3.0.1 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +2 -2
- data/.github/workflows/windows.yml +2 -2
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/README.md +20 -18
- data/integration/fluent.conf +11 -1
- data/integration/schema.json +32 -0
- data/lib/fluent/plugin/bigquery/schema.rb +49 -16
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery_base.rb +2 -2
- data/lib/fluent/plugin/out_bigquery_insert.rb +1 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +1 -0
- data/test/plugin/test_record_schema.rb +21 -6
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 762f6e4f78f96c8c6912dbdd5780aa9b79902bbe7ca2df7dee60dcc0897c0e80
|
4
|
+
data.tar.gz: ff7e0e61dddb066bdeced537521255743aa1b98cbc2e539a1e2076e4ef93d9ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e1bd8c1cfca5dd43bebb2e9b4e2b4e2630c9e6176e1a03606b3ac2c289f23e049d4b8333bc39fbbc0142cb4dc66de2437fdd1e46c2c69c283e6e8db895f7ca6
|
7
|
+
data.tar.gz: 0a55797f85a64d787020c443041fb3ee2f08525238122ab1ff51f901467c2a349fa3678726e1ac09e25bcb97b141c5168f66192bcd40319355e6dd212793db94
|
data/.github/workflows/linux.yml
CHANGED
@@ -9,9 +9,10 @@ jobs:
|
|
9
9
|
fail-fast: false
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- 2.7
|
13
12
|
- 3.0
|
14
13
|
- 3.1
|
14
|
+
- 3.2
|
15
|
+
- 3.3
|
15
16
|
os:
|
16
17
|
- ubuntu-latest
|
17
18
|
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
@@ -25,6 +26,5 @@ jobs:
|
|
25
26
|
CI: true
|
26
27
|
run: |
|
27
28
|
ruby -v
|
28
|
-
gem install bundler rake
|
29
29
|
bundle install --jobs 4 --retry 3
|
30
30
|
bundle exec rake test
|
@@ -9,9 +9,10 @@ jobs:
|
|
9
9
|
fail-fast: false
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- 2.7
|
13
12
|
- 3.0
|
14
13
|
- 3.1
|
14
|
+
- 3.2
|
15
|
+
- 3.3
|
15
16
|
os:
|
16
17
|
- windows-latest
|
17
18
|
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
@@ -25,6 +26,5 @@ jobs:
|
|
25
26
|
CI: true
|
26
27
|
run: |
|
27
28
|
ruby -v
|
28
|
-
gem install bundler rake
|
29
29
|
bundle install --jobs 4 --retry 3
|
30
30
|
bundle exec rake test
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
## [v3.1.0](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/compare/v3.0.1...v3.1.0) (2022-12-16)
|
2
|
+
|
3
|
+
|
4
|
+
### Features
|
5
|
+
|
6
|
+
* Support GEOGRAPHY type field ([#201](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/201)) ([734faa9](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/734faa9adb7cec1ed579fc6a0bd9ce72d48b82d0))
|
7
|
+
* Support JSON type field ([#204](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/204)) ([ec62bfa](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/ec62bfa2f858feb440e8bb8e8f8d6b8689f709bb))
|
8
|
+
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ If you use official alpine based fluentd docker image (https://github.com/fluent
|
|
30
30
|
You need to install `bigdecimal` gem on your own dockerfile.
|
31
31
|
Because alpine based image has only minimal ruby environment in order to reduce image size.
|
32
32
|
And in most case, dependency to embedded gem is not written on gemspec.
|
33
|
-
Because
|
33
|
+
Because embedded gem dependency sometimes restricts ruby environment.
|
34
34
|
|
35
35
|
## Configuration
|
36
36
|
|
@@ -40,7 +40,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
40
40
|
|
41
41
|
| name | type | required? | placeholder? | default | description |
|
42
42
|
| :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
43
|
-
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default`
|
43
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` (GKE Workload Identity) |
|
44
44
|
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
45
45
|
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
46
46
|
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
@@ -59,7 +59,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
59
59
|
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
60
60
|
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
61
61
|
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
62
|
-
| time_partitioning_type | enum | no (either day)
|
62
|
+
| time_partitioning_type | enum | no (either day or hour) | no | nil | Type of bigquery time partitioning feature. |
|
63
63
|
| time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition. |
|
64
64
|
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. |
|
65
65
|
| clustering_fields | array(string) | no | no | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
@@ -194,15 +194,15 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
194
194
|
```apache
|
195
195
|
<match dummy>
|
196
196
|
@type bigquery_insert
|
197
|
-
|
197
|
+
|
198
198
|
<buffer>
|
199
199
|
flush_interval 0.1 # flush as frequent as possible
|
200
|
-
|
200
|
+
|
201
201
|
total_limit_size 10g
|
202
|
-
|
202
|
+
|
203
203
|
flush_thread_count 16
|
204
204
|
</buffer>
|
205
|
-
|
205
|
+
|
206
206
|
auth_method private_key # default
|
207
207
|
email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
|
208
208
|
private_key_path /home/username/.keys/00000000000000000000000000000000-privatekey.p12
|
@@ -255,7 +255,7 @@ Important options for high rate events are:
|
|
255
255
|
* threads for insert api calls in parallel
|
256
256
|
* specify this option for 100 or more records per seconds
|
257
257
|
* 10 or more threads seems good for inserts over internet
|
258
|
-
*
|
258
|
+
* fewer threads may be good for Google Compute Engine instances (with low latency for BigQuery)
|
259
259
|
* `buffer/flush_interval`
|
260
260
|
* interval between data flushes (default 0.25)
|
261
261
|
* you can set subsecond values such as `0.15` on Fluentd v0.10.42 or later
|
@@ -294,7 +294,7 @@ There are four methods supported to fetch access token for the service account.
|
|
294
294
|
1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
|
295
295
|
2. JSON key of GCP(Google Cloud Platform)'s service account
|
296
296
|
3. Predefined access token (Compute Engine only)
|
297
|
-
4. Google application default credentials
|
297
|
+
4. [Google application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) / GKE Workload Identity
|
298
298
|
|
299
299
|
#### Public-Private key pair of GCP's service account
|
300
300
|
|
@@ -339,7 +339,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
339
339
|
|
340
340
|
#### Predefined access token (Compute Engine only)
|
341
341
|
|
342
|
-
When you run fluentd on
|
342
|
+
When you run fluentd on Google Compute Engine instance,
|
343
343
|
you don't need to explicitly create a service account for fluentd.
|
344
344
|
In this authentication method, you need to add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
|
345
345
|
Compute Engine instance, then you can configure fluentd like this.
|
@@ -360,14 +360,16 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
360
360
|
|
361
361
|
#### Application default credentials
|
362
362
|
|
363
|
-
The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at
|
363
|
+
The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at https://cloud.google.com/docs/authentication/application-default-credentials.
|
364
|
+
|
365
|
+
**This is the method you should choose if you want to use Workload Identity on GKE**.
|
364
366
|
|
365
367
|
In this authentication method, the credentials returned are determined by the environment the code is running in. Conditions are checked in the following order:credentials are get from following order.
|
366
368
|
|
367
369
|
1. The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is checked. If this variable is specified it should point to a JSON key file that defines the credentials.
|
368
|
-
2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If
|
369
|
-
3. Well known path is checked. If file
|
370
|
-
4. System default path is checked. If file
|
370
|
+
2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If these variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
|
371
|
+
3. Well known path is checked. If the file exists, it is used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
|
372
|
+
4. System default path is checked. If the file exists, it is used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
|
371
373
|
5. If you are running in Google Compute Engine production, the built-in service account associated with the virtual machine instance will be used.
|
372
374
|
6. If none of these conditions is true, an error will occur.
|
373
375
|
|
@@ -543,11 +545,11 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
543
545
|
@type bigquery_insert
|
544
546
|
|
545
547
|
...
|
546
|
-
|
548
|
+
|
547
549
|
schema_path /path/to/httpd.schema
|
548
550
|
</match>
|
549
551
|
```
|
550
|
-
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full
|
552
|
+
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexibility.
|
551
553
|
|
552
554
|
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
553
555
|
|
@@ -556,7 +558,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
556
558
|
@type bigquery_insert
|
557
559
|
|
558
560
|
...
|
559
|
-
|
561
|
+
|
560
562
|
fetch_schema true
|
561
563
|
# fetch_schema_table other_table # if you want to fetch schema from other table
|
562
564
|
</match>
|
@@ -594,5 +596,5 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
594
596
|
## Authors
|
595
597
|
|
596
598
|
* @tagomoris: First author, original version
|
597
|
-
* KAIZEN platform Inc.:
|
599
|
+
* KAIZEN platform Inc.: Maintainer, Since 2014.08.19
|
598
600
|
* @joker1007
|
data/integration/fluent.conf
CHANGED
@@ -4,6 +4,14 @@
|
|
4
4
|
bind 0.0.0.0
|
5
5
|
</source>
|
6
6
|
|
7
|
+
<source>
|
8
|
+
@type dummy
|
9
|
+
dummy {"json_field": {"foo": "val1", "bar": "val2", "hoge": 1}, "geography_field": {"type": "LineString", "coordinates": [[-118.4085, 33.9416], [-73.7781, 40.6413]]}, "timestamp_field": "2022-12-15T22:40:21+09:00", "date": "2022-12-15", "record_field": {"inner_field": "hoge", "inner_json": {"key1": "val1", "key2": "val2"}}, "repeated_string_field": ["a", "b", "c"]}
|
10
|
+
auto_increment_key id
|
11
|
+
|
12
|
+
tag insert_data
|
13
|
+
</source>
|
14
|
+
|
7
15
|
<match insert_data>
|
8
16
|
@id bigquery-insert-integration
|
9
17
|
@type bigquery_insert
|
@@ -21,7 +29,7 @@
|
|
21
29
|
total_limit_size 1g
|
22
30
|
path ./log/bigquery-insert-integration
|
23
31
|
|
24
|
-
flush_interval
|
32
|
+
flush_interval 15
|
25
33
|
flush_thread_count 4
|
26
34
|
flush_at_shutdown true
|
27
35
|
|
@@ -37,6 +45,7 @@
|
|
37
45
|
dataset "#{ENV["DATASET_NAME"]}"
|
38
46
|
table "#{ENV["TABLE_NAME"]}"
|
39
47
|
auto_create_table false
|
48
|
+
# schema_path integration/schema.json
|
40
49
|
fetch_schema true
|
41
50
|
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
42
51
|
|
@@ -78,6 +87,7 @@
|
|
78
87
|
dataset "#{ENV["DATASET_NAME"]}"
|
79
88
|
table "#{ENV["TABLE_NAME"]}"
|
80
89
|
auto_create_table false
|
90
|
+
# schema_path integration/schema.json
|
81
91
|
fetch_schema true
|
82
92
|
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
83
93
|
|
data/integration/schema.json
CHANGED
@@ -9,6 +9,16 @@
|
|
9
9
|
"type": "STRING",
|
10
10
|
"mode": "NULLABLE"
|
11
11
|
},
|
12
|
+
{
|
13
|
+
"name": "json_field",
|
14
|
+
"type": "JSON",
|
15
|
+
"mode": "NULLABLE"
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"name": "geography_field",
|
19
|
+
"type": "GEOGRAPHY",
|
20
|
+
"mode": "NULLABLE"
|
21
|
+
},
|
12
22
|
{
|
13
23
|
"name": "timestamp_field",
|
14
24
|
"type": "TIMESTAMP",
|
@@ -18,5 +28,27 @@
|
|
18
28
|
"name": "date",
|
19
29
|
"type": "DATE",
|
20
30
|
"mode": "REQUIRED"
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"name": "record_field",
|
34
|
+
"type": "RECORD",
|
35
|
+
"mode": "NULLABLE",
|
36
|
+
"fields": [
|
37
|
+
{
|
38
|
+
"name": "inner_field",
|
39
|
+
"type": "STRING",
|
40
|
+
"mode": "REQUIRED"
|
41
|
+
},
|
42
|
+
{
|
43
|
+
"name": "inner_json",
|
44
|
+
"type": "JSON",
|
45
|
+
"mode": "REQUIRED"
|
46
|
+
}
|
47
|
+
]
|
48
|
+
},
|
49
|
+
{
|
50
|
+
"name": "repeated_string_field",
|
51
|
+
"type": "STRING",
|
52
|
+
"mode": "REPEATED"
|
21
53
|
}
|
22
54
|
]
|
@@ -23,23 +23,23 @@ module Fluent
|
|
23
23
|
|
24
24
|
attr_reader :name, :mode
|
25
25
|
|
26
|
-
def format(value)
|
26
|
+
def format(value, is_load: false)
|
27
27
|
case @mode
|
28
28
|
when :nullable
|
29
|
-
format_one(value) unless value.nil?
|
29
|
+
format_one(value, is_load: is_load) unless value.nil?
|
30
30
|
when :required
|
31
31
|
if value.nil?
|
32
32
|
log.warn "Required field #{name} cannot be null"
|
33
33
|
nil
|
34
34
|
else
|
35
|
-
format_one(value)
|
35
|
+
format_one(value, is_load: is_load)
|
36
36
|
end
|
37
37
|
when :repeated
|
38
|
-
value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
|
38
|
+
value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v, is_load: true) if v }
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
-
def format_one(value)
|
42
|
+
def format_one(value, is_load: false)
|
43
43
|
raise NotImplementedError, "Must implement in a subclass"
|
44
44
|
end
|
45
45
|
|
@@ -57,7 +57,7 @@ module Fluent
|
|
57
57
|
:string
|
58
58
|
end
|
59
59
|
|
60
|
-
def format_one(value)
|
60
|
+
def format_one(value, is_load: false)
|
61
61
|
if value.is_a?(Hash) || value.is_a?(Array)
|
62
62
|
MultiJson.dump(value)
|
63
63
|
else
|
@@ -66,12 +66,32 @@ module Fluent
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
class JsonFieldSchema < FieldSchema
|
70
|
+
def type
|
71
|
+
:json
|
72
|
+
end
|
73
|
+
|
74
|
+
def format_one(value, is_load: false)
|
75
|
+
if is_load
|
76
|
+
value
|
77
|
+
else
|
78
|
+
MultiJson.dump(value)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class GeographyFieldSchema < StringFieldSchema
|
84
|
+
def type
|
85
|
+
:geography
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
69
89
|
class IntegerFieldSchema < FieldSchema
|
70
90
|
def type
|
71
91
|
:integer
|
72
92
|
end
|
73
93
|
|
74
|
-
def format_one(value)
|
94
|
+
def format_one(value, is_load: false)
|
75
95
|
value.to_i
|
76
96
|
end
|
77
97
|
end
|
@@ -81,7 +101,7 @@ module Fluent
|
|
81
101
|
:float
|
82
102
|
end
|
83
103
|
|
84
|
-
def format_one(value)
|
104
|
+
def format_one(value, is_load: false)
|
85
105
|
value.to_f
|
86
106
|
end
|
87
107
|
end
|
@@ -91,17 +111,27 @@ module Fluent
|
|
91
111
|
:numeric
|
92
112
|
end
|
93
113
|
|
94
|
-
def format_one(value)
|
114
|
+
def format_one(value, is_load: false)
|
95
115
|
value.to_s
|
96
116
|
end
|
97
117
|
end
|
98
118
|
|
119
|
+
class BigNumericFieldSchema < FieldSchema
|
120
|
+
def type
|
121
|
+
:bignumeric
|
122
|
+
end
|
123
|
+
|
124
|
+
def format_one(value, is_load: false)
|
125
|
+
value.to_s
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
99
129
|
class BooleanFieldSchema < FieldSchema
|
100
130
|
def type
|
101
131
|
:boolean
|
102
132
|
end
|
103
133
|
|
104
|
-
def format_one(value)
|
134
|
+
def format_one(value, is_load: false)
|
105
135
|
!!value
|
106
136
|
end
|
107
137
|
end
|
@@ -114,7 +144,7 @@ module Fluent
|
|
114
144
|
:timestamp
|
115
145
|
end
|
116
146
|
|
117
|
-
def format_one(value)
|
147
|
+
def format_one(value, is_load: false)
|
118
148
|
case value
|
119
149
|
when Time
|
120
150
|
value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
|
@@ -137,7 +167,7 @@ module Fluent
|
|
137
167
|
:date
|
138
168
|
end
|
139
169
|
|
140
|
-
def format_one(value)
|
170
|
+
def format_one(value, is_load: false)
|
141
171
|
if value.respond_to?(:strftime)
|
142
172
|
value.strftime("%Y-%m-%d")
|
143
173
|
else
|
@@ -151,7 +181,7 @@ module Fluent
|
|
151
181
|
:datetime
|
152
182
|
end
|
153
183
|
|
154
|
-
def format_one(value)
|
184
|
+
def format_one(value, is_load: false)
|
155
185
|
if value.respond_to?(:strftime)
|
156
186
|
value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
|
157
187
|
else
|
@@ -165,7 +195,7 @@ module Fluent
|
|
165
195
|
:time
|
166
196
|
end
|
167
197
|
|
168
|
-
def format_one(value)
|
198
|
+
def format_one(value, is_load: false)
|
169
199
|
if value.respond_to?(:strftime)
|
170
200
|
value.strftime("%H:%M:%S.%6L")
|
171
201
|
else
|
@@ -180,11 +210,14 @@ module Fluent
|
|
180
210
|
integer: IntegerFieldSchema,
|
181
211
|
float: FloatFieldSchema,
|
182
212
|
numeric: NumericFieldSchema,
|
213
|
+
bignumeric: BigNumericFieldSchema,
|
183
214
|
boolean: BooleanFieldSchema,
|
184
215
|
timestamp: TimestampFieldSchema,
|
185
216
|
date: DateFieldSchema,
|
186
217
|
datetime: DateTimeFieldSchema,
|
187
218
|
time: TimeFieldSchema,
|
219
|
+
json: JsonFieldSchema,
|
220
|
+
geography: GeographyFieldSchema,
|
188
221
|
record: RecordSchema
|
189
222
|
}.freeze
|
190
223
|
|
@@ -256,12 +289,12 @@ module Fluent
|
|
256
289
|
end
|
257
290
|
end
|
258
291
|
|
259
|
-
def format_one(record)
|
292
|
+
def format_one(record, is_load: false)
|
260
293
|
out = {}
|
261
294
|
record.each do |key, value|
|
262
295
|
next if value.nil?
|
263
296
|
schema = @fields[key]
|
264
|
-
out[key] = schema ? schema.format(value) : value
|
297
|
+
out[key] = schema ? schema.format(value, is_load: is_load) : value
|
265
298
|
end
|
266
299
|
out
|
267
300
|
end
|
@@ -69,7 +69,7 @@ module Fluent
|
|
69
69
|
config_param :request_open_timeout_sec, :time, default: 60
|
70
70
|
|
71
71
|
## Partitioning
|
72
|
-
config_param :time_partitioning_type, :enum, list: [:day], default: nil
|
72
|
+
config_param :time_partitioning_type, :enum, list: [:day, :hour], default: nil
|
73
73
|
config_param :time_partitioning_field, :string, default: nil
|
74
74
|
config_param :time_partitioning_expiration, :time, default: nil
|
75
75
|
|
@@ -172,7 +172,7 @@ module Fluent
|
|
172
172
|
end
|
173
173
|
|
174
174
|
begin
|
175
|
-
row = schema.format(record)
|
175
|
+
row = schema.format(record, is_load: !!@is_load)
|
176
176
|
return if row.empty?
|
177
177
|
@formatter.format(tag, time, row)
|
178
178
|
rescue
|
@@ -36,6 +36,7 @@ module Fluent
|
|
36
36
|
|
37
37
|
def configure(conf)
|
38
38
|
super
|
39
|
+
@is_load = true
|
39
40
|
|
40
41
|
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
|
41
42
|
placeholder_validate!(:bigquery_load, placeholder_params)
|
@@ -29,9 +29,14 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
29
29
|
"mode" => "REPEATED"
|
30
30
|
},
|
31
31
|
{
|
32
|
-
"name" => "
|
32
|
+
"name" => "utilization",
|
33
33
|
"type" => "NUMERIC",
|
34
34
|
"mode" => "NULLABLE"
|
35
|
+
},
|
36
|
+
{
|
37
|
+
"name" => "bigutilization",
|
38
|
+
"type" => "BIGNUMERIC",
|
39
|
+
"mode" => "NULLABLE"
|
35
40
|
}
|
36
41
|
]
|
37
42
|
end
|
@@ -64,15 +69,20 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
64
69
|
"mode" => "REPEATED"
|
65
70
|
},
|
66
71
|
{
|
67
|
-
"name" => "
|
72
|
+
"name" => "utilization",
|
68
73
|
"type" => "NUMERIC",
|
69
74
|
"mode" => "NULLABLE"
|
70
75
|
},
|
76
|
+
{
|
77
|
+
"name" => "bigutilization",
|
78
|
+
"type" => "BIGNUMERIC",
|
79
|
+
"mode" => "NULLABLE"
|
80
|
+
},
|
71
81
|
{
|
72
82
|
"name" => "new_column",
|
73
83
|
"type" => "STRING",
|
74
84
|
"mode" => "REQUIRED"
|
75
|
-
}
|
85
|
+
},
|
76
86
|
]
|
77
87
|
end
|
78
88
|
|
@@ -104,9 +114,14 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
104
114
|
"mode" => "REPEATED"
|
105
115
|
},
|
106
116
|
{
|
107
|
-
"name" => "
|
117
|
+
"name" => "utilization",
|
108
118
|
"type" => "NUMERIC",
|
109
119
|
"mode" => "NULLABLE"
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"name" => "bigutilization",
|
123
|
+
"type" => "BIGNUMERIC",
|
124
|
+
"mode" => "NULLABLE"
|
110
125
|
}
|
111
126
|
]
|
112
127
|
end
|
@@ -157,12 +172,12 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
157
172
|
time = Time.local(2016, 2, 7, 19, 0, 0).utc
|
158
173
|
|
159
174
|
formatted = fields.format_one({
|
160
|
-
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "
|
175
|
+
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilization" => "0.837", "bigutilization" => "0.837"
|
161
176
|
})
|
162
177
|
assert_equal(
|
163
178
|
formatted,
|
164
179
|
{
|
165
|
-
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "
|
180
|
+
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilization" => "0.837", "bigutilization" => "0.837"
|
166
181
|
}
|
167
182
|
)
|
168
183
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-10-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -142,6 +142,7 @@ files:
|
|
142
142
|
- ".github/workflows/linux.yml"
|
143
143
|
- ".github/workflows/windows.yml"
|
144
144
|
- ".gitignore"
|
145
|
+
- CHANGELOG.md
|
145
146
|
- Gemfile
|
146
147
|
- LICENSE.txt
|
147
148
|
- README.md
|
@@ -190,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
191
|
- !ruby/object:Gem::Version
|
191
192
|
version: '0'
|
192
193
|
requirements: []
|
193
|
-
rubygems_version: 3.
|
194
|
+
rubygems_version: 3.5.11
|
194
195
|
signing_key:
|
195
196
|
specification_version: 4
|
196
197
|
summary: Fluentd plugin to store data on Google BigQuery
|