fluent-plugin-bigquery 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52e15b9cc1e5fba553895298e0e1a4510b2c3be0e333a8c9853ef8fb9a30e721
4
- data.tar.gz: 9be1a5a48e75f63bd83c103111664690a7e8fa583dfda548a7da2dfd3437960f
3
+ metadata.gz: 762f6e4f78f96c8c6912dbdd5780aa9b79902bbe7ca2df7dee60dcc0897c0e80
4
+ data.tar.gz: ff7e0e61dddb066bdeced537521255743aa1b98cbc2e539a1e2076e4ef93d9ae
5
5
  SHA512:
6
- metadata.gz: 8fd48a77fa9cf4b04706c4c3d041aa36ccc5011024fd6b37287c7ac661d0137458940e832410ae14a2385d77a0370908a22a6e856cbc9de4194da5a0866691ff
7
- data.tar.gz: aff96e78358ced9a0a213739e8968bc4caa65afa1915ba4bc1a4660161978418ced12dbdec539ef960967f628c8893fd821db28ffe4e4401fe22010e200934ee
6
+ metadata.gz: 0e1bd8c1cfca5dd43bebb2e9b4e2b4e2630c9e6176e1a03606b3ac2c289f23e049d4b8333bc39fbbc0142cb4dc66de2437fdd1e46c2c69c283e6e8db895f7ca6
7
+ data.tar.gz: 0a55797f85a64d787020c443041fb3ee2f08525238122ab1ff51f901467c2a349fa3678726e1ac09e25bcb97b141c5168f66192bcd40319355e6dd212793db94
@@ -9,9 +9,10 @@ jobs:
9
9
  fail-fast: false
10
10
  matrix:
11
11
  ruby:
12
- - 2.7
13
12
  - 3.0
14
13
  - 3.1
14
+ - 3.2
15
+ - 3.3
15
16
  os:
16
17
  - ubuntu-latest
17
18
  name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
@@ -25,6 +26,5 @@ jobs:
25
26
  CI: true
26
27
  run: |
27
28
  ruby -v
28
- gem install bundler rake
29
29
  bundle install --jobs 4 --retry 3
30
30
  bundle exec rake test
@@ -9,9 +9,10 @@ jobs:
9
9
  fail-fast: false
10
10
  matrix:
11
11
  ruby:
12
- - 2.7
13
12
  - 3.0
14
13
  - 3.1
14
+ - 3.2
15
+ - 3.3
15
16
  os:
16
17
  - windows-latest
17
18
  name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
@@ -25,6 +26,5 @@ jobs:
25
26
  CI: true
26
27
  run: |
27
28
  ruby -v
28
- gem install bundler rake
29
29
  bundle install --jobs 4 --retry 3
30
30
  bundle exec rake test
data/.gitignore CHANGED
@@ -17,5 +17,6 @@ test/tmp
17
17
  test/version_tmp
18
18
  tmp
19
19
  script/
20
+ .idea/
20
21
 
21
22
  fluentd-0.12
data/CHANGELOG.md ADDED
@@ -0,0 +1,8 @@
1
+ ## [v3.1.0](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/compare/v3.0.1...v3.1.0) (2022-12-16)
2
+
3
+
4
+ ### Features
5
+
6
+ * Support GEOGRAPHY type field ([#201](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/201)) ([734faa9](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/734faa9adb7cec1ed579fc6a0bd9ce72d48b82d0))
7
+ * Support JSON type field ([#204](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/204)) ([ec62bfa](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/ec62bfa2f858feb440e8bb8e8f8d6b8689f709bb))
8
+
data/README.md CHANGED
@@ -30,7 +30,7 @@ If you use official alpine based fluentd docker image (https://github.com/fluent
30
30
  You need to install `bigdecimal` gem on your own dockerfile.
31
31
  Because alpine based image has only minimal ruby environment in order to reduce image size.
32
32
  And in most case, dependency to embedded gem is not written on gemspec.
33
- Because embbeded gem dependency sometimes restricts ruby environment.
33
+ Because embedded gem dependency sometimes restricts ruby environment.
34
34
 
35
35
  ## Configuration
36
36
 
@@ -40,7 +40,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
40
40
 
41
41
  | name | type | required? | placeholder? | default | description |
42
42
  | :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
43
- | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
43
+ | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` (GKE Workload Identity) |
44
44
  | email | string | yes (private_key) | no | nil | GCP Service Account Email |
45
45
  | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
46
46
  | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
@@ -59,7 +59,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
59
59
  | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
60
60
  | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
61
61
  | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
62
- | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature. |
62
+ | time_partitioning_type | enum | no (either day or hour) | no | nil | Type of bigquery time partitioning feature. |
63
63
  | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition. |
64
64
  | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. |
65
65
  | clustering_fields | array(string) | no | no | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
@@ -194,15 +194,15 @@ For high rate inserts over streaming inserts, you should specify flush intervals
194
194
  ```apache
195
195
  <match dummy>
196
196
  @type bigquery_insert
197
-
197
+
198
198
  <buffer>
199
199
  flush_interval 0.1 # flush as frequent as possible
200
-
200
+
201
201
  total_limit_size 10g
202
-
202
+
203
203
  flush_thread_count 16
204
204
  </buffer>
205
-
205
+
206
206
  auth_method private_key # default
207
207
  email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
208
208
  private_key_path /home/username/.keys/00000000000000000000000000000000-privatekey.p12
@@ -255,7 +255,7 @@ Important options for high rate events are:
255
255
  * threads for insert api calls in parallel
256
256
  * specify this option for 100 or more records per seconds
257
257
  * 10 or more threads seems good for inserts over internet
258
- * less threads may be good for Google Compute Engine instances (with low latency for BigQuery)
258
+ * fewer threads may be good for Google Compute Engine instances (with low latency for BigQuery)
259
259
  * `buffer/flush_interval`
260
260
  * interval between data flushes (default 0.25)
261
261
  * you can set subsecond values such as `0.15` on Fluentd v0.10.42 or later
@@ -294,7 +294,7 @@ There are four methods supported to fetch access token for the service account.
294
294
  1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
295
295
  2. JSON key of GCP(Google Cloud Platform)'s service account
296
296
  3. Predefined access token (Compute Engine only)
297
- 4. Google application default credentials (http://goo.gl/IUuyuX)
297
+ 4. [Google application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) / GKE Workload Identity
298
298
 
299
299
  #### Public-Private key pair of GCP's service account
300
300
 
@@ -339,7 +339,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
339
339
 
340
340
  #### Predefined access token (Compute Engine only)
341
341
 
342
- When you run fluentd on Googlce Compute Engine instance,
342
+ When you run fluentd on Google Compute Engine instance,
343
343
  you don't need to explicitly create a service account for fluentd.
344
344
  In this authentication method, you need to add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
345
345
  Compute Engine instance, then you can configure fluentd like this.
@@ -360,14 +360,16 @@ Compute Engine instance, then you can configure fluentd like this.
360
360
 
361
361
  #### Application default credentials
362
362
 
363
- The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at http://goo.gl/IUuyuX.
363
+ The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at https://cloud.google.com/docs/authentication/application-default-credentials.
364
+
365
+ **This is the method you should choose if you want to use Workload Identity on GKE**.
364
366
 
365
367
  In this authentication method, the credentials returned are determined by the environment the code is running in. Conditions are checked in the following order:credentials are get from following order.
366
368
 
367
369
  1. The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is checked. If this variable is specified it should point to a JSON key file that defines the credentials.
368
- 2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If this variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
369
- 3. Well known path is checked. If file is exists, the file used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
370
- 4. System default path is checked. If file is exists, the file used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
370
+ 2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If these variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
371
+ 3. Well known path is checked. If the file exists, it is used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
372
+ 4. System default path is checked. If the file exists, it is used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
371
373
  5. If you are running in Google Compute Engine production, the built-in service account associated with the virtual machine instance will be used.
372
374
  6. If none of these conditions is true, an error will occur.
373
375
 
@@ -543,11 +545,11 @@ The second method is to specify a path to a BigQuery schema file instead of list
543
545
  @type bigquery_insert
544
546
 
545
547
  ...
546
-
548
+
547
549
  schema_path /path/to/httpd.schema
548
550
  </match>
549
551
  ```
550
- where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexbility.
552
+ where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexibility.
551
553
 
552
554
  The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
553
555
 
@@ -556,7 +558,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
556
558
  @type bigquery_insert
557
559
 
558
560
  ...
559
-
561
+
560
562
  fetch_schema true
561
563
  # fetch_schema_table other_table # if you want to fetch schema from other table
562
564
  </match>
@@ -594,5 +596,5 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
594
596
  ## Authors
595
597
 
596
598
  * @tagomoris: First author, original version
597
- * KAIZEN platform Inc.: Maintener, Since 2014.08.19
599
+ * KAIZEN platform Inc.: Maintainer, Since 2014.08.19
598
600
  * @joker1007
@@ -116,6 +116,16 @@ module Fluent
116
116
  end
117
117
  end
118
118
 
119
+ class BigNumericFieldSchema < FieldSchema
120
+ def type
121
+ :bignumeric
122
+ end
123
+
124
+ def format_one(value, is_load: false)
125
+ value.to_s
126
+ end
127
+ end
128
+
119
129
  class BooleanFieldSchema < FieldSchema
120
130
  def type
121
131
  :boolean
@@ -200,6 +210,7 @@ module Fluent
200
210
  integer: IntegerFieldSchema,
201
211
  float: FloatFieldSchema,
202
212
  numeric: NumericFieldSchema,
213
+ bignumeric: BigNumericFieldSchema,
203
214
  boolean: BooleanFieldSchema,
204
215
  timestamp: TimestampFieldSchema,
205
216
  date: DateFieldSchema,
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "3.1.0".freeze
3
+ VERSION = "3.2.0".freeze
4
4
  end
5
5
  end
@@ -69,7 +69,7 @@ module Fluent
69
69
  config_param :request_open_timeout_sec, :time, default: 60
70
70
 
71
71
  ## Partitioning
72
- config_param :time_partitioning_type, :enum, list: [:day], default: nil
72
+ config_param :time_partitioning_type, :enum, list: [:day, :hour], default: nil
73
73
  config_param :time_partitioning_field, :string, default: nil
74
74
  config_param :time_partitioning_expiration, :time, default: nil
75
75
 
@@ -29,9 +29,14 @@ class RecordSchemaTest < Test::Unit::TestCase
29
29
  "mode" => "REPEATED"
30
30
  },
31
31
  {
32
- "name" => "utilisation",
32
+ "name" => "utilization",
33
33
  "type" => "NUMERIC",
34
34
  "mode" => "NULLABLE"
35
+ },
36
+ {
37
+ "name" => "bigutilization",
38
+ "type" => "BIGNUMERIC",
39
+ "mode" => "NULLABLE"
35
40
  }
36
41
  ]
37
42
  end
@@ -64,15 +69,20 @@ class RecordSchemaTest < Test::Unit::TestCase
64
69
  "mode" => "REPEATED"
65
70
  },
66
71
  {
67
- "name" => "utilisation",
72
+ "name" => "utilization",
68
73
  "type" => "NUMERIC",
69
74
  "mode" => "NULLABLE"
70
75
  },
76
+ {
77
+ "name" => "bigutilization",
78
+ "type" => "BIGNUMERIC",
79
+ "mode" => "NULLABLE"
80
+ },
71
81
  {
72
82
  "name" => "new_column",
73
83
  "type" => "STRING",
74
84
  "mode" => "REQUIRED"
75
- }
85
+ },
76
86
  ]
77
87
  end
78
88
 
@@ -104,9 +114,14 @@ class RecordSchemaTest < Test::Unit::TestCase
104
114
  "mode" => "REPEATED"
105
115
  },
106
116
  {
107
- "name" => "utilisation",
117
+ "name" => "utilization",
108
118
  "type" => "NUMERIC",
109
119
  "mode" => "NULLABLE"
120
+ },
121
+ {
122
+ "name" => "bigutilization",
123
+ "type" => "BIGNUMERIC",
124
+ "mode" => "NULLABLE"
110
125
  }
111
126
  ]
112
127
  end
@@ -157,12 +172,12 @@ class RecordSchemaTest < Test::Unit::TestCase
157
172
  time = Time.local(2016, 2, 7, 19, 0, 0).utc
158
173
 
159
174
  formatted = fields.format_one({
160
- "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
175
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilization" => "0.837", "bigutilization" => "0.837"
161
176
  })
162
177
  assert_equal(
163
178
  formatted,
164
179
  {
165
- "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
180
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilization" => "0.837", "bigutilization" => "0.837"
166
181
  }
167
182
  )
168
183
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-12-16 00:00:00.000000000 Z
12
+ date: 2024-10-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -142,6 +142,7 @@ files:
142
142
  - ".github/workflows/linux.yml"
143
143
  - ".github/workflows/windows.yml"
144
144
  - ".gitignore"
145
+ - CHANGELOG.md
145
146
  - Gemfile
146
147
  - LICENSE.txt
147
148
  - README.md
@@ -190,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
190
191
  - !ruby/object:Gem::Version
191
192
  version: '0'
192
193
  requirements: []
193
- rubygems_version: 3.3.7
194
+ rubygems_version: 3.5.11
194
195
  signing_key:
195
196
  specification_version: 4
196
197
  summary: Fluentd plugin to store data on Google BigQuery