RubyGems - fluent-plugin-bigquery - Versions diffs - 3.1.0 → 3.2.0 - Mend

fluent-plugin-bigquery 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/.github/workflows/linux.yml +2 -2
data/.github/workflows/windows.yml +2 -2
data/.gitignore +1 -0
data/CHANGELOG.md +8 -0
data/README.md +20 -18
data/lib/fluent/plugin/bigquery/schema.rb +11 -0
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/out_bigquery_base.rb +1 -1
data/test/plugin/test_record_schema.rb +21 -6
metadata +4 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 52e15b9cc1e5fba553895298e0e1a4510b2c3be0e333a8c9853ef8fb9a30e721
-  data.tar.gz: 9be1a5a48e75f63bd83c103111664690a7e8fa583dfda548a7da2dfd3437960f
+  metadata.gz: 762f6e4f78f96c8c6912dbdd5780aa9b79902bbe7ca2df7dee60dcc0897c0e80
+  data.tar.gz: ff7e0e61dddb066bdeced537521255743aa1b98cbc2e539a1e2076e4ef93d9ae
 SHA512:
-  metadata.gz: 8fd48a77fa9cf4b04706c4c3d041aa36ccc5011024fd6b37287c7ac661d0137458940e832410ae14a2385d77a0370908a22a6e856cbc9de4194da5a0866691ff
-  data.tar.gz: aff96e78358ced9a0a213739e8968bc4caa65afa1915ba4bc1a4660161978418ced12dbdec539ef960967f628c8893fd821db28ffe4e4401fe22010e200934ee
+  metadata.gz: 0e1bd8c1cfca5dd43bebb2e9b4e2b4e2630c9e6176e1a03606b3ac2c289f23e049d4b8333bc39fbbc0142cb4dc66de2437fdd1e46c2c69c283e6e8db895f7ca6
+  data.tar.gz: 0a55797f85a64d787020c443041fb3ee2f08525238122ab1ff51f901467c2a349fa3678726e1ac09e25bcb97b141c5168f66192bcd40319355e6dd212793db94

data/.github/workflows/linux.yml CHANGED Viewed

@@ -9,9 +9,10 @@ jobs:
       fail-fast: false
       matrix:
         ruby:
-          - 2.7
           - 3.0
           - 3.1
+          - 3.2
+          - 3.3
         os:
           - ubuntu-latest
     name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
@@ -25,6 +26,5 @@ jobs:
         CI: true
       run: |
         ruby -v
-        gem install bundler rake
         bundle install --jobs 4 --retry 3
         bundle exec rake test

data/.github/workflows/windows.yml CHANGED Viewed

@@ -9,9 +9,10 @@ jobs:
       fail-fast: false
       matrix:
         ruby:
-          - 2.7
           - 3.0
           - 3.1
+          - 3.2
+          - 3.3
         os:
           - windows-latest
     name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
@@ -25,6 +26,5 @@ jobs:
         CI: true
       run: |
         ruby -v
-        gem install bundler rake
         bundle install --jobs 4 --retry 3
         bundle exec rake test

data/.gitignore CHANGED Viewed

@@ -17,5 +17,6 @@ test/tmp
 test/version_tmp
 tmp
 script/
+.idea/
 fluentd-0.12

data/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,8 @@
+## [v3.1.0](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/compare/v3.0.1...v3.1.0) (2022-12-16)
+### Features
+* Support GEOGRAPHY type field ([#201](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/201)) ([734faa9](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/734faa9adb7cec1ed579fc6a0bd9ce72d48b82d0))
+* Support JSON type field ([#204](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/204)) ([ec62bfa](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/ec62bfa2f858feb440e8bb8e8f8d6b8689f709bb))

data/README.md CHANGED Viewed

@@ -30,7 +30,7 @@ If you use official alpine based fluentd docker image (https://github.com/fluent
 You need to install `bigdecimal` gem on your own dockerfile.
 Because alpine based image has only minimal ruby environment in order to reduce image size.
 And in most case, dependency to embedded gem is not written on gemspec.
-Because embbeded gem dependency sometimes restricts ruby environment.
+Because embedded gem dependency sometimes restricts ruby environment.
 ## Configuration
@@ -40,7 +40,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | name                                          | type          | required?                                    | placeholder? | default                    | description                                                                                            |
 | :-------------------------------------------- | :------------ | :-----------                                 | :----------  | :------------------------- | :-----------------------                                                                               |
-| auth_method                                   | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default`                               |
+| auth_method                                   | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default` (GKE Workload Identity)       |
 | email                                         | string        | yes (private_key)                            | no           | nil                        | GCP Service Account Email                                                                              |
 | private_key_path                              | string        | yes (private_key)                            | no           | nil                        | GCP Private Key file path                                                                              |
 | private_key_passphrase                        | string        | yes (private_key)                            | no           | nil                        | GCP Private Key Passphrase                                                                             |
@@ -59,7 +59,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | schema_cache_expire                           | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
 | request_timeout_sec                           | integer       | no                                           | no           | nil                        | Bigquery API response timeout                                                                          |
 | request_open_timeout_sec                      | integer       | no                                           | no           | 60                         | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value.       |
-| time_partitioning_type                        | enum          | no (either day)                              | no           | nil                        | Type of bigquery time partitioning feature.                                                            |
+| time_partitioning_type                        | enum          | no (either day or hour)                      | no           | nil                        | Type of bigquery time partitioning feature.                                                            |
 | time_partitioning_field                       | string        | no                                           | no           | nil                        | Field used to determine how to create a time-based partition.                                          |
 | time_partitioning_expiration                  | time          | no                                           | no           | nil                        | Expiration milliseconds for bigquery time partitioning.                                                |
 | clustering_fields                             | array(string) | no                                           | no           | nil                        | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
@@ -194,15 +194,15 @@ For high rate inserts over streaming inserts, you should specify flush intervals
 ```apache
 <match dummy>
   @type bigquery_insert
   <buffer>
     flush_interval 0.1  # flush as frequent as possible
     total_limit_size 10g
     flush_thread_count 16
   </buffer>
   auth_method private_key   # default
   email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
   private_key_path /home/username/.keys/00000000000000000000000000000000-privatekey.p12
@@ -255,7 +255,7 @@ Important options for high rate events are:
     * threads for insert api calls in parallel
     * specify this option for 100 or more records per seconds
     * 10 or more threads seems good for inserts over internet
-    * less threads may be good for Google Compute Engine instances (with low latency for BigQuery)
+    * fewer threads may be good for Google Compute Engine instances (with low latency for BigQuery)
   * `buffer/flush_interval`
     * interval between data flushes (default 0.25)
     * you can set subsecond values such as `0.15` on Fluentd v0.10.42 or later
@@ -294,7 +294,7 @@ There are four methods supported to fetch access token for the service account.
 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
 2. JSON key of GCP(Google Cloud Platform)'s service account
 3. Predefined access token (Compute Engine only)
-4. Google application default credentials (http://goo.gl/IUuyuX)
+4. [Google application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) / GKE Workload Identity
 #### Public-Private key pair of GCP's service account
@@ -339,7 +339,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
 #### Predefined access token (Compute Engine only)
-When you run fluentd on Googlce Compute Engine instance,
+When you run fluentd on Google Compute Engine instance,
 you don't need to explicitly create a service account for fluentd.
 In this authentication method, you need to add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
 Compute Engine instance, then you can configure fluentd like this.
@@ -360,14 +360,16 @@ Compute Engine instance, then you can configure fluentd like this.
 #### Application default credentials
-The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at http://goo.gl/IUuyuX.
+The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at https://cloud.google.com/docs/authentication/application-default-credentials.
+**This is the method you should choose if you want to use Workload Identity on GKE**.
 In this authentication method, the credentials returned are determined by the environment the code is running in. Conditions are checked in the following order:credentials are get from following order.
 1. The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is checked. If this variable is specified it should point to a JSON key file that defines the credentials.
-2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If this variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
-3. Well known path is checked. If file is exists, the file used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
-4. System default path is checked. If file is exists, the file used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
+2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If these variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
+3. Well known path is checked. If the file exists, it is used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
+4. System default path is checked. If the file exists, it is used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
 5. If you are running in Google Compute Engine production, the built-in service account associated with the virtual machine instance will be used.
 6. If none of these conditions is true, an error will occur.
@@ -543,11 +545,11 @@ The second method is to specify a path to a BigQuery schema file instead of list
   @type bigquery_insert
   ...
   schema_path /path/to/httpd.schema
 </match>
 ```
-where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexbility.
+where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexibility.
 The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API.  In this case, your fluent.conf looks like:
@@ -556,7 +558,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
   @type bigquery_insert
   ...
   fetch_schema true
   # fetch_schema_table other_table # if you want to fetch schema from other table
 </match>
@@ -594,5 +596,5 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
 ## Authors
 * @tagomoris: First author, original version
-* KAIZEN platform Inc.: Maintener, Since 2014.08.19
+* KAIZEN platform Inc.: Maintainer, Since 2014.08.19
 * @joker1007

data/lib/fluent/plugin/bigquery/schema.rb CHANGED Viewed

@@ -116,6 +116,16 @@ module Fluent
       end
     end
+    class BigNumericFieldSchema < FieldSchema
+      def type
+        :bignumeric
+      end
+      def format_one(value, is_load: false)
+        value.to_s
+      end
+    end
     class BooleanFieldSchema < FieldSchema
       def type
         :boolean
@@ -200,6 +210,7 @@ module Fluent
         integer: IntegerFieldSchema,
         float: FloatFieldSchema,
         numeric: NumericFieldSchema,
+        bignumeric: BigNumericFieldSchema,
         boolean: BooleanFieldSchema,
         timestamp: TimestampFieldSchema,
         date: DateFieldSchema,

data/lib/fluent/plugin/bigquery/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "3.1.0".freeze
+    VERSION = "3.2.0".freeze
   end
 end

data/lib/fluent/plugin/out_bigquery_base.rb CHANGED Viewed

@@ -69,7 +69,7 @@ module Fluent
       config_param :request_open_timeout_sec, :time, default: 60
       ## Partitioning
-      config_param :time_partitioning_type, :enum, list: [:day], default: nil
+      config_param :time_partitioning_type, :enum, list: [:day, :hour], default: nil
       config_param :time_partitioning_field, :string, default: nil
       config_param :time_partitioning_expiration, :time, default: nil

data/test/plugin/test_record_schema.rb CHANGED Viewed

@@ -29,9 +29,14 @@ class RecordSchemaTest < Test::Unit::TestCase
         "mode" => "REPEATED"
       },
       {
-        "name" => "utilisation",
+        "name" => "utilization",
         "type" => "NUMERIC",
         "mode" => "NULLABLE"
+      },
+      {
+        "name" => "bigutilization",
+        "type" => "BIGNUMERIC",
+        "mode" => "NULLABLE"
       }
     ]
   end
@@ -64,15 +69,20 @@ class RecordSchemaTest < Test::Unit::TestCase
         "mode" => "REPEATED"
       },
       {
-        "name" => "utilisation",
+        "name" => "utilization",
         "type" => "NUMERIC",
         "mode" => "NULLABLE"
       },
+      {
+        "name" => "bigutilization",
+        "type" => "BIGNUMERIC",
+        "mode" => "NULLABLE"
+      },
       {
         "name" => "new_column",
         "type" => "STRING",
         "mode" => "REQUIRED"
-      }
+      },
     ]
   end
@@ -104,9 +114,14 @@ class RecordSchemaTest < Test::Unit::TestCase
         "mode" => "REPEATED"
       },
       {
-        "name" => "utilisation",
+        "name" => "utilization",
         "type" => "NUMERIC",
         "mode" => "NULLABLE"
+      },
+      {
+        "name" => "bigutilization",
+        "type" => "BIGNUMERIC",
+        "mode" => "NULLABLE"
       }
     ]
   end
@@ -157,12 +172,12 @@ class RecordSchemaTest < Test::Unit::TestCase
     time = Time.local(2016, 2, 7, 19, 0, 0).utc
     formatted = fields.format_one({
-      "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
+      "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilization" => "0.837", "bigutilization" => "0.837"
     })
     assert_equal(
       formatted,
       {
-        "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
+        "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilization" => "0.837", "bigutilization" => "0.837"
       }
     )
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 3.1.0
+  version: 3.2.0
 platform: ruby
 authors:
 - Naoya Ito
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-12-16 00:00:00.000000000 Z
+date: 2024-10-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -142,6 +142,7 @@ files:
 - ".github/workflows/linux.yml"
 - ".github/workflows/windows.yml"
 - ".gitignore"
+- CHANGELOG.md
 - Gemfile
 - LICENSE.txt
 - README.md
@@ -190,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.7
+rubygems_version: 3.5.11
 signing_key:
 specification_version: 4
 summary: Fluentd plugin to store data on Google BigQuery