fluent-plugin-bigquery 3.0.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +2 -2
- data/.github/workflows/windows.yml +2 -2
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/README.md +20 -18
- data/integration/fluent.conf +11 -1
- data/integration/schema.json +32 -0
- data/lib/fluent/plugin/bigquery/schema.rb +49 -16
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery_base.rb +2 -2
- data/lib/fluent/plugin/out_bigquery_insert.rb +1 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +1 -0
- data/test/plugin/test_record_schema.rb +21 -6
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 762f6e4f78f96c8c6912dbdd5780aa9b79902bbe7ca2df7dee60dcc0897c0e80
|
4
|
+
data.tar.gz: ff7e0e61dddb066bdeced537521255743aa1b98cbc2e539a1e2076e4ef93d9ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e1bd8c1cfca5dd43bebb2e9b4e2b4e2630c9e6176e1a03606b3ac2c289f23e049d4b8333bc39fbbc0142cb4dc66de2437fdd1e46c2c69c283e6e8db895f7ca6
|
7
|
+
data.tar.gz: 0a55797f85a64d787020c443041fb3ee2f08525238122ab1ff51f901467c2a349fa3678726e1ac09e25bcb97b141c5168f66192bcd40319355e6dd212793db94
|
data/.github/workflows/linux.yml
CHANGED
@@ -9,9 +9,10 @@ jobs:
|
|
9
9
|
fail-fast: false
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- 2.7
|
13
12
|
- 3.0
|
14
13
|
- 3.1
|
14
|
+
- 3.2
|
15
|
+
- 3.3
|
15
16
|
os:
|
16
17
|
- ubuntu-latest
|
17
18
|
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
@@ -25,6 +26,5 @@ jobs:
|
|
25
26
|
CI: true
|
26
27
|
run: |
|
27
28
|
ruby -v
|
28
|
-
gem install bundler rake
|
29
29
|
bundle install --jobs 4 --retry 3
|
30
30
|
bundle exec rake test
|
@@ -9,9 +9,10 @@ jobs:
|
|
9
9
|
fail-fast: false
|
10
10
|
matrix:
|
11
11
|
ruby:
|
12
|
-
- 2.7
|
13
12
|
- 3.0
|
14
13
|
- 3.1
|
14
|
+
- 3.2
|
15
|
+
- 3.3
|
15
16
|
os:
|
16
17
|
- windows-latest
|
17
18
|
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
@@ -25,6 +26,5 @@ jobs:
|
|
25
26
|
CI: true
|
26
27
|
run: |
|
27
28
|
ruby -v
|
28
|
-
gem install bundler rake
|
29
29
|
bundle install --jobs 4 --retry 3
|
30
30
|
bundle exec rake test
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
## [v3.1.0](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/compare/v3.0.1...v3.1.0) (2022-12-16)
|
2
|
+
|
3
|
+
|
4
|
+
### Features
|
5
|
+
|
6
|
+
* Support GEOGRAPHY type field ([#201](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/201)) ([734faa9](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/734faa9adb7cec1ed579fc6a0bd9ce72d48b82d0))
|
7
|
+
* Support JSON type field ([#204](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/204)) ([ec62bfa](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/ec62bfa2f858feb440e8bb8e8f8d6b8689f709bb))
|
8
|
+
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ If you use official alpine based fluentd docker image (https://github.com/fluent
|
|
30
30
|
You need to install `bigdecimal` gem on your own dockerfile.
|
31
31
|
Because alpine based image has only minimal ruby environment in order to reduce image size.
|
32
32
|
And in most case, dependency to embedded gem is not written on gemspec.
|
33
|
-
Because
|
33
|
+
Because embedded gem dependency sometimes restricts ruby environment.
|
34
34
|
|
35
35
|
## Configuration
|
36
36
|
|
@@ -40,7 +40,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
40
40
|
|
41
41
|
| name | type | required? | placeholder? | default | description |
|
42
42
|
| :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
43
|
-
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default`
|
43
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` (GKE Workload Identity) |
|
44
44
|
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
45
45
|
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
46
46
|
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
@@ -59,7 +59,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
59
59
|
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
60
60
|
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
61
61
|
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
62
|
-
| time_partitioning_type | enum | no (either day)
|
62
|
+
| time_partitioning_type | enum | no (either day or hour) | no | nil | Type of bigquery time partitioning feature. |
|
63
63
|
| time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition. |
|
64
64
|
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. |
|
65
65
|
| clustering_fields | array(string) | no | no | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
@@ -194,15 +194,15 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
194
194
|
```apache
|
195
195
|
<match dummy>
|
196
196
|
@type bigquery_insert
|
197
|
-
|
197
|
+
|
198
198
|
<buffer>
|
199
199
|
flush_interval 0.1 # flush as frequent as possible
|
200
|
-
|
200
|
+
|
201
201
|
total_limit_size 10g
|
202
|
-
|
202
|
+
|
203
203
|
flush_thread_count 16
|
204
204
|
</buffer>
|
205
|
-
|
205
|
+
|
206
206
|
auth_method private_key # default
|
207
207
|
email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
|
208
208
|
private_key_path /home/username/.keys/00000000000000000000000000000000-privatekey.p12
|
@@ -255,7 +255,7 @@ Important options for high rate events are:
|
|
255
255
|
* threads for insert api calls in parallel
|
256
256
|
* specify this option for 100 or more records per seconds
|
257
257
|
* 10 or more threads seems good for inserts over internet
|
258
|
-
*
|
258
|
+
* fewer threads may be good for Google Compute Engine instances (with low latency for BigQuery)
|
259
259
|
* `buffer/flush_interval`
|
260
260
|
* interval between data flushes (default 0.25)
|
261
261
|
* you can set subsecond values such as `0.15` on Fluentd v0.10.42 or later
|
@@ -294,7 +294,7 @@ There are four methods supported to fetch access token for the service account.
|
|
294
294
|
1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
|
295
295
|
2. JSON key of GCP(Google Cloud Platform)'s service account
|
296
296
|
3. Predefined access token (Compute Engine only)
|
297
|
-
4. Google application default credentials
|
297
|
+
4. [Google application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) / GKE Workload Identity
|
298
298
|
|
299
299
|
#### Public-Private key pair of GCP's service account
|
300
300
|
|
@@ -339,7 +339,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
339
339
|
|
340
340
|
#### Predefined access token (Compute Engine only)
|
341
341
|
|
342
|
-
When you run fluentd on
|
342
|
+
When you run fluentd on Google Compute Engine instance,
|
343
343
|
you don't need to explicitly create a service account for fluentd.
|
344
344
|
In this authentication method, you need to add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
|
345
345
|
Compute Engine instance, then you can configure fluentd like this.
|
@@ -360,14 +360,16 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
360
360
|
|
361
361
|
#### Application default credentials
|
362
362
|
|
363
|
-
The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at
|
363
|
+
The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at https://cloud.google.com/docs/authentication/application-default-credentials.
|
364
|
+
|
365
|
+
**This is the method you should choose if you want to use Workload Identity on GKE**.
|
364
366
|
|
365
367
|
In this authentication method, the credentials returned are determined by the environment the code is running in. Conditions are checked in the following order:credentials are get from following order.
|
366
368
|
|
367
369
|
1. The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is checked. If this variable is specified it should point to a JSON key file that defines the credentials.
|
368
|
-
2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If
|
369
|
-
3. Well known path is checked. If file
|
370
|
-
4. System default path is checked. If file
|
370
|
+
2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If these variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
|
371
|
+
3. Well known path is checked. If the file exists, it is used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
|
372
|
+
4. System default path is checked. If the file exists, it is used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
|
371
373
|
5. If you are running in Google Compute Engine production, the built-in service account associated with the virtual machine instance will be used.
|
372
374
|
6. If none of these conditions is true, an error will occur.
|
373
375
|
|
@@ -543,11 +545,11 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
543
545
|
@type bigquery_insert
|
544
546
|
|
545
547
|
...
|
546
|
-
|
548
|
+
|
547
549
|
schema_path /path/to/httpd.schema
|
548
550
|
</match>
|
549
551
|
```
|
550
|
-
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full
|
552
|
+
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexibility.
|
551
553
|
|
552
554
|
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
553
555
|
|
@@ -556,7 +558,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
556
558
|
@type bigquery_insert
|
557
559
|
|
558
560
|
...
|
559
|
-
|
561
|
+
|
560
562
|
fetch_schema true
|
561
563
|
# fetch_schema_table other_table # if you want to fetch schema from other table
|
562
564
|
</match>
|
@@ -594,5 +596,5 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
594
596
|
## Authors
|
595
597
|
|
596
598
|
* @tagomoris: First author, original version
|
597
|
-
* KAIZEN platform Inc.:
|
599
|
+
* KAIZEN platform Inc.: Maintainer, Since 2014.08.19
|
598
600
|
* @joker1007
|
data/integration/fluent.conf
CHANGED
@@ -4,6 +4,14 @@
|
|
4
4
|
bind 0.0.0.0
|
5
5
|
</source>
|
6
6
|
|
7
|
+
<source>
|
8
|
+
@type dummy
|
9
|
+
dummy {"json_field": {"foo": "val1", "bar": "val2", "hoge": 1}, "geography_field": {"type": "LineString", "coordinates": [[-118.4085, 33.9416], [-73.7781, 40.6413]]}, "timestamp_field": "2022-12-15T22:40:21+09:00", "date": "2022-12-15", "record_field": {"inner_field": "hoge", "inner_json": {"key1": "val1", "key2": "val2"}}, "repeated_string_field": ["a", "b", "c"]}
|
10
|
+
auto_increment_key id
|
11
|
+
|
12
|
+
tag insert_data
|
13
|
+
</source>
|
14
|
+
|
7
15
|
<match insert_data>
|
8
16
|
@id bigquery-insert-integration
|
9
17
|
@type bigquery_insert
|
@@ -21,7 +29,7 @@
|
|
21
29
|
total_limit_size 1g
|
22
30
|
path ./log/bigquery-insert-integration
|
23
31
|
|
24
|
-
flush_interval
|
32
|
+
flush_interval 15
|
25
33
|
flush_thread_count 4
|
26
34
|
flush_at_shutdown true
|
27
35
|
|
@@ -37,6 +45,7 @@
|
|
37
45
|
dataset "#{ENV["DATASET_NAME"]}"
|
38
46
|
table "#{ENV["TABLE_NAME"]}"
|
39
47
|
auto_create_table false
|
48
|
+
# schema_path integration/schema.json
|
40
49
|
fetch_schema true
|
41
50
|
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
42
51
|
|
@@ -78,6 +87,7 @@
|
|
78
87
|
dataset "#{ENV["DATASET_NAME"]}"
|
79
88
|
table "#{ENV["TABLE_NAME"]}"
|
80
89
|
auto_create_table false
|
90
|
+
# schema_path integration/schema.json
|
81
91
|
fetch_schema true
|
82
92
|
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
83
93
|
|
data/integration/schema.json
CHANGED
@@ -9,6 +9,16 @@
|
|
9
9
|
"type": "STRING",
|
10
10
|
"mode": "NULLABLE"
|
11
11
|
},
|
12
|
+
{
|
13
|
+
"name": "json_field",
|
14
|
+
"type": "JSON",
|
15
|
+
"mode": "NULLABLE"
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"name": "geography_field",
|
19
|
+
"type": "GEOGRAPHY",
|
20
|
+
"mode": "NULLABLE"
|
21
|
+
},
|
12
22
|
{
|
13
23
|
"name": "timestamp_field",
|
14
24
|
"type": "TIMESTAMP",
|
@@ -18,5 +28,27 @@
|
|
18
28
|
"name": "date",
|
19
29
|
"type": "DATE",
|
20
30
|
"mode": "REQUIRED"
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"name": "record_field",
|
34
|
+
"type": "RECORD",
|
35
|
+
"mode": "NULLABLE",
|
36
|
+
"fields": [
|
37
|
+
{
|
38
|
+
"name": "inner_field",
|
39
|
+
"type": "STRING",
|
40
|
+
"mode": "REQUIRED"
|
41
|
+
},
|
42
|
+
{
|
43
|
+
"name": "inner_json",
|
44
|
+
"type": "JSON",
|
45
|
+
"mode": "REQUIRED"
|
46
|
+
}
|
47
|
+
]
|
48
|
+
},
|
49
|
+
{
|
50
|
+
"name": "repeated_string_field",
|
51
|
+
"type": "STRING",
|
52
|
+
"mode": "REPEATED"
|
21
53
|
}
|
22
54
|
]
|
@@ -23,23 +23,23 @@ module Fluent
|
|
23
23
|
|
24
24
|
attr_reader :name, :mode
|
25
25
|
|
26
|
-
def format(value)
|
26
|
+
def format(value, is_load: false)
|
27
27
|
case @mode
|
28
28
|
when :nullable
|
29
|
-
format_one(value) unless value.nil?
|
29
|
+
format_one(value, is_load: is_load) unless value.nil?
|
30
30
|
when :required
|
31
31
|
if value.nil?
|
32
32
|
log.warn "Required field #{name} cannot be null"
|
33
33
|
nil
|
34
34
|
else
|
35
|
-
format_one(value)
|
35
|
+
format_one(value, is_load: is_load)
|
36
36
|
end
|
37
37
|
when :repeated
|
38
|
-
value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
|
38
|
+
value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v, is_load: true) if v }
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
-
def format_one(value)
|
42
|
+
def format_one(value, is_load: false)
|
43
43
|
raise NotImplementedError, "Must implement in a subclass"
|
44
44
|
end
|
45
45
|
|
@@ -57,7 +57,7 @@ module Fluent
|
|
57
57
|
:string
|
58
58
|
end
|
59
59
|
|
60
|
-
def format_one(value)
|
60
|
+
def format_one(value, is_load: false)
|
61
61
|
if value.is_a?(Hash) || value.is_a?(Array)
|
62
62
|
MultiJson.dump(value)
|
63
63
|
else
|
@@ -66,12 +66,32 @@ module Fluent
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
class JsonFieldSchema < FieldSchema
|
70
|
+
def type
|
71
|
+
:json
|
72
|
+
end
|
73
|
+
|
74
|
+
def format_one(value, is_load: false)
|
75
|
+
if is_load
|
76
|
+
value
|
77
|
+
else
|
78
|
+
MultiJson.dump(value)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class GeographyFieldSchema < StringFieldSchema
|
84
|
+
def type
|
85
|
+
:geography
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
69
89
|
class IntegerFieldSchema < FieldSchema
|
70
90
|
def type
|
71
91
|
:integer
|
72
92
|
end
|
73
93
|
|
74
|
-
def format_one(value)
|
94
|
+
def format_one(value, is_load: false)
|
75
95
|
value.to_i
|
76
96
|
end
|
77
97
|
end
|
@@ -81,7 +101,7 @@ module Fluent
|
|
81
101
|
:float
|
82
102
|
end
|
83
103
|
|
84
|
-
def format_one(value)
|
104
|
+
def format_one(value, is_load: false)
|
85
105
|
value.to_f
|
86
106
|
end
|
87
107
|
end
|
@@ -91,17 +111,27 @@ module Fluent
|
|
91
111
|
:numeric
|
92
112
|
end
|
93
113
|
|
94
|
-
def format_one(value)
|
114
|
+
def format_one(value, is_load: false)
|
95
115
|
value.to_s
|
96
116
|
end
|
97
117
|
end
|
98
118
|
|
119
|
+
class BigNumericFieldSchema < FieldSchema
|
120
|
+
def type
|
121
|
+
:bignumeric
|
122
|
+
end
|
123
|
+
|
124
|
+
def format_one(value, is_load: false)
|
125
|
+
value.to_s
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
99
129
|
class BooleanFieldSchema < FieldSchema
|
100
130
|
def type
|
101
131
|
:boolean
|
102
132
|
end
|
103
133
|
|
104
|
-
def format_one(value)
|
134
|
+
def format_one(value, is_load: false)
|
105
135
|
!!value
|
106
136
|
end
|
107
137
|
end
|
@@ -114,7 +144,7 @@ module Fluent
|
|
114
144
|
:timestamp
|
115
145
|
end
|
116
146
|
|
117
|
-
def format_one(value)
|
147
|
+
def format_one(value, is_load: false)
|
118
148
|
case value
|
119
149
|
when Time
|
120
150
|
value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
|
@@ -137,7 +167,7 @@ module Fluent
|
|
137
167
|
:date
|
138
168
|
end
|
139
169
|
|
140
|
-
def format_one(value)
|
170
|
+
def format_one(value, is_load: false)
|
141
171
|
if value.respond_to?(:strftime)
|
142
172
|
value.strftime("%Y-%m-%d")
|
143
173
|
else
|
@@ -151,7 +181,7 @@ module Fluent
|
|
151
181
|
:datetime
|
152
182
|
end
|
153
183
|
|
154
|
-
def format_one(value)
|
184
|
+
def format_one(value, is_load: false)
|
155
185
|
if value.respond_to?(:strftime)
|
156
186
|
value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
|
157
187
|
else
|
@@ -165,7 +195,7 @@ module Fluent
|
|
165
195
|
:time
|
166
196
|
end
|
167
197
|
|
168
|
-
def format_one(value)
|
198
|
+
def format_one(value, is_load: false)
|
169
199
|
if value.respond_to?(:strftime)
|
170
200
|
value.strftime("%H:%M:%S.%6L")
|
171
201
|
else
|
@@ -180,11 +210,14 @@ module Fluent
|
|
180
210
|
integer: IntegerFieldSchema,
|
181
211
|
float: FloatFieldSchema,
|
182
212
|
numeric: NumericFieldSchema,
|
213
|
+
bignumeric: BigNumericFieldSchema,
|
183
214
|
boolean: BooleanFieldSchema,
|
184
215
|
timestamp: TimestampFieldSchema,
|
185
216
|
date: DateFieldSchema,
|
186
217
|
datetime: DateTimeFieldSchema,
|
187
218
|
time: TimeFieldSchema,
|
219
|
+
json: JsonFieldSchema,
|
220
|
+
geography: GeographyFieldSchema,
|
188
221
|
record: RecordSchema
|
189
222
|
}.freeze
|
190
223
|
|
@@ -256,12 +289,12 @@ module Fluent
|
|
256
289
|
end
|
257
290
|
end
|
258
291
|
|
259
|
-
def format_one(record)
|
292
|
+
def format_one(record, is_load: false)
|
260
293
|
out = {}
|
261
294
|
record.each do |key, value|
|
262
295
|
next if value.nil?
|
263
296
|
schema = @fields[key]
|
264
|
-
out[key] = schema ? schema.format(value) : value
|
297
|
+
out[key] = schema ? schema.format(value, is_load: is_load) : value
|
265
298
|
end
|
266
299
|
out
|
267
300
|
end
|
@@ -69,7 +69,7 @@ module Fluent
|
|
69
69
|
config_param :request_open_timeout_sec, :time, default: 60
|
70
70
|
|
71
71
|
## Partitioning
|
72
|
-
config_param :time_partitioning_type, :enum, list: [:day], default: nil
|
72
|
+
config_param :time_partitioning_type, :enum, list: [:day, :hour], default: nil
|
73
73
|
config_param :time_partitioning_field, :string, default: nil
|
74
74
|
config_param :time_partitioning_expiration, :time, default: nil
|
75
75
|
|
@@ -172,7 +172,7 @@ module Fluent
|
|
172
172
|
end
|
173
173
|
|
174
174
|
begin
|
175
|
-
row = schema.format(record)
|
175
|
+
row = schema.format(record, is_load: !!@is_load)
|
176
176
|
return if row.empty?
|
177
177
|
@formatter.format(tag, time, row)
|
178
178
|
rescue
|
@@ -36,6 +36,7 @@ module Fluent
|
|
36
36
|
|
37
37
|
def configure(conf)
|
38
38
|
super
|
39
|
+
@is_load = true
|
39
40
|
|
40
41
|
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
|
41
42
|
placeholder_validate!(:bigquery_load, placeholder_params)
|
@@ -29,9 +29,14 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
29
29
|
"mode" => "REPEATED"
|
30
30
|
},
|
31
31
|
{
|
32
|
-
"name" => "
|
32
|
+
"name" => "utilization",
|
33
33
|
"type" => "NUMERIC",
|
34
34
|
"mode" => "NULLABLE"
|
35
|
+
},
|
36
|
+
{
|
37
|
+
"name" => "bigutilization",
|
38
|
+
"type" => "BIGNUMERIC",
|
39
|
+
"mode" => "NULLABLE"
|
35
40
|
}
|
36
41
|
]
|
37
42
|
end
|
@@ -64,15 +69,20 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
64
69
|
"mode" => "REPEATED"
|
65
70
|
},
|
66
71
|
{
|
67
|
-
"name" => "
|
72
|
+
"name" => "utilization",
|
68
73
|
"type" => "NUMERIC",
|
69
74
|
"mode" => "NULLABLE"
|
70
75
|
},
|
76
|
+
{
|
77
|
+
"name" => "bigutilization",
|
78
|
+
"type" => "BIGNUMERIC",
|
79
|
+
"mode" => "NULLABLE"
|
80
|
+
},
|
71
81
|
{
|
72
82
|
"name" => "new_column",
|
73
83
|
"type" => "STRING",
|
74
84
|
"mode" => "REQUIRED"
|
75
|
-
}
|
85
|
+
},
|
76
86
|
]
|
77
87
|
end
|
78
88
|
|
@@ -104,9 +114,14 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
104
114
|
"mode" => "REPEATED"
|
105
115
|
},
|
106
116
|
{
|
107
|
-
"name" => "
|
117
|
+
"name" => "utilization",
|
108
118
|
"type" => "NUMERIC",
|
109
119
|
"mode" => "NULLABLE"
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"name" => "bigutilization",
|
123
|
+
"type" => "BIGNUMERIC",
|
124
|
+
"mode" => "NULLABLE"
|
110
125
|
}
|
111
126
|
]
|
112
127
|
end
|
@@ -157,12 +172,12 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
157
172
|
time = Time.local(2016, 2, 7, 19, 0, 0).utc
|
158
173
|
|
159
174
|
formatted = fields.format_one({
|
160
|
-
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "
|
175
|
+
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilization" => "0.837", "bigutilization" => "0.837"
|
161
176
|
})
|
162
177
|
assert_equal(
|
163
178
|
formatted,
|
164
179
|
{
|
165
|
-
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "
|
180
|
+
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilization" => "0.837", "bigutilization" => "0.837"
|
166
181
|
}
|
167
182
|
)
|
168
183
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-10-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -142,6 +142,7 @@ files:
|
|
142
142
|
- ".github/workflows/linux.yml"
|
143
143
|
- ".github/workflows/windows.yml"
|
144
144
|
- ".gitignore"
|
145
|
+
- CHANGELOG.md
|
145
146
|
- Gemfile
|
146
147
|
- LICENSE.txt
|
147
148
|
- README.md
|
@@ -190,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
191
|
- !ruby/object:Gem::Version
|
191
192
|
version: '0'
|
192
193
|
requirements: []
|
193
|
-
rubygems_version: 3.
|
194
|
+
rubygems_version: 3.5.11
|
194
195
|
signing_key:
|
195
196
|
specification_version: 4
|
196
197
|
summary: Fluentd plugin to store data on Google BigQuery
|