fluent-plugin-bigquery 3.0.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +2 -2
- data/.github/workflows/windows.yml +2 -2
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/README.md +20 -18
- data/integration/fluent.conf +11 -1
- data/integration/schema.json +32 -0
- data/lib/fluent/plugin/bigquery/schema.rb +49 -16
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery_base.rb +2 -2
- data/lib/fluent/plugin/out_bigquery_insert.rb +1 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +1 -0
- data/test/plugin/test_record_schema.rb +21 -6
- metadata +4 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 762f6e4f78f96c8c6912dbdd5780aa9b79902bbe7ca2df7dee60dcc0897c0e80
         | 
| 4 | 
            +
              data.tar.gz: ff7e0e61dddb066bdeced537521255743aa1b98cbc2e539a1e2076e4ef93d9ae
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 0e1bd8c1cfca5dd43bebb2e9b4e2b4e2630c9e6176e1a03606b3ac2c289f23e049d4b8333bc39fbbc0142cb4dc66de2437fdd1e46c2c69c283e6e8db895f7ca6
         | 
| 7 | 
            +
              data.tar.gz: 0a55797f85a64d787020c443041fb3ee2f08525238122ab1ff51f901467c2a349fa3678726e1ac09e25bcb97b141c5168f66192bcd40319355e6dd212793db94
         | 
    
        data/.github/workflows/linux.yml
    CHANGED
    
    | @@ -9,9 +9,10 @@ jobs: | |
| 9 9 | 
             
                  fail-fast: false
         | 
| 10 10 | 
             
                  matrix:
         | 
| 11 11 | 
             
                    ruby:
         | 
| 12 | 
            -
                      - 2.7
         | 
| 13 12 | 
             
                      - 3.0
         | 
| 14 13 | 
             
                      - 3.1
         | 
| 14 | 
            +
                      - 3.2
         | 
| 15 | 
            +
                      - 3.3
         | 
| 15 16 | 
             
                    os:
         | 
| 16 17 | 
             
                      - ubuntu-latest
         | 
| 17 18 | 
             
                name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
         | 
| @@ -25,6 +26,5 @@ jobs: | |
| 25 26 | 
             
                    CI: true
         | 
| 26 27 | 
             
                  run: |
         | 
| 27 28 | 
             
                    ruby -v
         | 
| 28 | 
            -
                    gem install bundler rake
         | 
| 29 29 | 
             
                    bundle install --jobs 4 --retry 3
         | 
| 30 30 | 
             
                    bundle exec rake test
         | 
| @@ -9,9 +9,10 @@ jobs: | |
| 9 9 | 
             
                  fail-fast: false
         | 
| 10 10 | 
             
                  matrix:
         | 
| 11 11 | 
             
                    ruby:
         | 
| 12 | 
            -
                      - 2.7
         | 
| 13 12 | 
             
                      - 3.0
         | 
| 14 13 | 
             
                      - 3.1
         | 
| 14 | 
            +
                      - 3.2
         | 
| 15 | 
            +
                      - 3.3
         | 
| 15 16 | 
             
                    os:
         | 
| 16 17 | 
             
                      - windows-latest
         | 
| 17 18 | 
             
                name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
         | 
| @@ -25,6 +26,5 @@ jobs: | |
| 25 26 | 
             
                    CI: true
         | 
| 26 27 | 
             
                  run: |
         | 
| 27 28 | 
             
                    ruby -v
         | 
| 28 | 
            -
                    gem install bundler rake
         | 
| 29 29 | 
             
                    bundle install --jobs 4 --retry 3
         | 
| 30 30 | 
             
                    bundle exec rake test
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/CHANGELOG.md
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
| 1 | 
            +
            ## [v3.1.0](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/compare/v3.0.1...v3.1.0) (2022-12-16)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
            +
            ### Features
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            * Support GEOGRAPHY type field ([#201](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/201)) ([734faa9](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/734faa9adb7cec1ed579fc6a0bd9ce72d48b82d0))
         | 
| 7 | 
            +
            * Support JSON type field ([#204](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/issues/204)) ([ec62bfa](https://github.com/fluent-plugins-nursery/fluent-plugin-bigquery/commit/ec62bfa2f858feb440e8bb8e8f8d6b8689f709bb))
         | 
| 8 | 
            +
             | 
    
        data/README.md
    CHANGED
    
    | @@ -30,7 +30,7 @@ If you use official alpine based fluentd docker image (https://github.com/fluent | |
| 30 30 | 
             
            You need to install `bigdecimal` gem on your own dockerfile.
         | 
| 31 31 | 
             
            Because alpine based image has only minimal ruby environment in order to reduce image size.
         | 
| 32 32 | 
             
            And in most case, dependency to embedded gem is not written on gemspec.
         | 
| 33 | 
            -
            Because  | 
| 33 | 
            +
            Because embedded gem dependency sometimes restricts ruby environment.
         | 
| 34 34 |  | 
| 35 35 | 
             
            ## Configuration
         | 
| 36 36 |  | 
| @@ -40,7 +40,7 @@ Because embbeded gem dependency sometimes restricts ruby environment. | |
| 40 40 |  | 
| 41 41 | 
             
            | name                                          | type          | required?                                    | placeholder? | default                    | description                                                                                            |
         | 
| 42 42 | 
             
            | :-------------------------------------------- | :------------ | :-----------                                 | :----------  | :------------------------- | :-----------------------                                                                               |
         | 
| 43 | 
            -
            | auth_method                                   | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default` | 
| 43 | 
            +
            | auth_method                                   | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default` (GKE Workload Identity)       |
         | 
| 44 44 | 
             
            | email                                         | string        | yes (private_key)                            | no           | nil                        | GCP Service Account Email                                                                              |
         | 
| 45 45 | 
             
            | private_key_path                              | string        | yes (private_key)                            | no           | nil                        | GCP Private Key file path                                                                              |
         | 
| 46 46 | 
             
            | private_key_passphrase                        | string        | yes (private_key)                            | no           | nil                        | GCP Private Key Passphrase                                                                             |
         | 
| @@ -59,7 +59,7 @@ Because embbeded gem dependency sometimes restricts ruby environment. | |
| 59 59 | 
             
            | schema_cache_expire                           | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
         | 
| 60 60 | 
             
            | request_timeout_sec                           | integer       | no                                           | no           | nil                        | Bigquery API response timeout                                                                          |
         | 
| 61 61 | 
             
            | request_open_timeout_sec                      | integer       | no                                           | no           | 60                         | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value.       |
         | 
| 62 | 
            -
            | time_partitioning_type                        | enum          | no (either day) | 
| 62 | 
            +
            | time_partitioning_type                        | enum          | no (either day or hour)                      | no           | nil                        | Type of bigquery time partitioning feature.                                                            |
         | 
| 63 63 | 
             
            | time_partitioning_field                       | string        | no                                           | no           | nil                        | Field used to determine how to create a time-based partition.                                          |
         | 
| 64 64 | 
             
            | time_partitioning_expiration                  | time          | no                                           | no           | nil                        | Expiration milliseconds for bigquery time partitioning.                                                |
         | 
| 65 65 | 
             
            | clustering_fields                             | array(string) | no                                           | no           | nil                        | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
         | 
| @@ -194,15 +194,15 @@ For high rate inserts over streaming inserts, you should specify flush intervals | |
| 194 194 | 
             
            ```apache
         | 
| 195 195 | 
             
            <match dummy>
         | 
| 196 196 | 
             
              @type bigquery_insert
         | 
| 197 | 
            -
             | 
| 197 | 
            +
             | 
| 198 198 | 
             
              <buffer>
         | 
| 199 199 | 
             
                flush_interval 0.1  # flush as frequent as possible
         | 
| 200 | 
            -
             | 
| 200 | 
            +
             | 
| 201 201 | 
             
                total_limit_size 10g
         | 
| 202 | 
            -
             | 
| 202 | 
            +
             | 
| 203 203 | 
             
                flush_thread_count 16
         | 
| 204 204 | 
             
              </buffer>
         | 
| 205 | 
            -
             | 
| 205 | 
            +
             | 
| 206 206 | 
             
              auth_method private_key   # default
         | 
| 207 207 | 
             
              email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
         | 
| 208 208 | 
             
              private_key_path /home/username/.keys/00000000000000000000000000000000-privatekey.p12
         | 
| @@ -255,7 +255,7 @@ Important options for high rate events are: | |
| 255 255 | 
             
                * threads for insert api calls in parallel
         | 
| 256 256 | 
             
                * specify this option for 100 or more records per seconds
         | 
| 257 257 | 
             
                * 10 or more threads seems good for inserts over internet
         | 
| 258 | 
            -
                *  | 
| 258 | 
            +
                * fewer threads may be good for Google Compute Engine instances (with low latency for BigQuery)
         | 
| 259 259 | 
             
              * `buffer/flush_interval`
         | 
| 260 260 | 
             
                * interval between data flushes (default 0.25)
         | 
| 261 261 | 
             
                * you can set subsecond values such as `0.15` on Fluentd v0.10.42 or later
         | 
| @@ -294,7 +294,7 @@ There are four methods supported to fetch access token for the service account. | |
| 294 294 | 
             
            1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
         | 
| 295 295 | 
             
            2. JSON key of GCP(Google Cloud Platform)'s service account
         | 
| 296 296 | 
             
            3. Predefined access token (Compute Engine only)
         | 
| 297 | 
            -
            4. Google application default credentials | 
| 297 | 
            +
            4. [Google application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) / GKE Workload Identity
         | 
| 298 298 |  | 
| 299 299 | 
             
            #### Public-Private key pair of GCP's service account
         | 
| 300 300 |  | 
| @@ -339,7 +339,7 @@ You need to only include `private_key` and `client_email` key from JSON key file | |
| 339 339 |  | 
| 340 340 | 
             
            #### Predefined access token (Compute Engine only)
         | 
| 341 341 |  | 
| 342 | 
            -
            When you run fluentd on  | 
| 342 | 
            +
            When you run fluentd on Google Compute Engine instance,
         | 
| 343 343 | 
             
            you don't need to explicitly create a service account for fluentd.
         | 
| 344 344 | 
             
            In this authentication method, you need to add the API scope "https://www.googleapis.com/auth/bigquery" to the scope list of your
         | 
| 345 345 | 
             
            Compute Engine instance, then you can configure fluentd like this.
         | 
| @@ -360,14 +360,16 @@ Compute Engine instance, then you can configure fluentd like this. | |
| 360 360 |  | 
| 361 361 | 
             
            #### Application default credentials
         | 
| 362 362 |  | 
| 363 | 
            -
            The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at  | 
| 363 | 
            +
            The Application Default Credentials provide a simple way to get authorization credentials for use in calling Google APIs, which are described in detail at https://cloud.google.com/docs/authentication/application-default-credentials.
         | 
| 364 | 
            +
             | 
| 365 | 
            +
            **This is the method you should choose if you want to use Workload Identity on GKE**.
         | 
| 364 366 |  | 
| 365 367 | 
             
            In this authentication method, the credentials returned are determined by the environment the code is running in. Conditions are checked in the following order:credentials are get from following order.
         | 
| 366 368 |  | 
| 367 369 | 
             
            1. The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is checked. If this variable is specified it should point to a JSON key file that defines the credentials.
         | 
| 368 | 
            -
            2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If  | 
| 369 | 
            -
            3. Well known path is checked. If file  | 
| 370 | 
            -
            4. System default path is checked. If file  | 
| 370 | 
            +
            2. The environment variable `GOOGLE_PRIVATE_KEY` and `GOOGLE_CLIENT_EMAIL` are checked. If these variables are specified `GOOGLE_PRIVATE_KEY` should point to `private_key`, `GOOGLE_CLIENT_EMAIL` should point to `client_email` in a JSON key.
         | 
| 371 | 
            +
            3. Well known path is checked. If the file exists, it is used as a JSON key file. This path is `$HOME/.config/gcloud/application_default_credentials.json`.
         | 
| 372 | 
            +
            4. System default path is checked. If the file exists, it is used as a JSON key file. This path is `/etc/google/auth/application_default_credentials.json`.
         | 
| 371 373 | 
             
            5. If you are running in Google Compute Engine production, the built-in service account associated with the virtual machine instance will be used.
         | 
| 372 374 | 
             
            6. If none of these conditions is true, an error will occur.
         | 
| 373 375 |  | 
| @@ -543,11 +545,11 @@ The second method is to specify a path to a BigQuery schema file instead of list | |
| 543 545 | 
             
              @type bigquery_insert
         | 
| 544 546 |  | 
| 545 547 | 
             
              ...
         | 
| 546 | 
            -
             | 
| 548 | 
            +
             | 
| 547 549 | 
             
              schema_path /path/to/httpd.schema
         | 
| 548 550 | 
             
            </match>
         | 
| 549 551 | 
             
            ```
         | 
| 550 | 
            -
            where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full  | 
| 552 | 
            +
            where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexibility.
         | 
| 551 553 |  | 
| 552 554 | 
             
            The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API.  In this case, your fluent.conf looks like:
         | 
| 553 555 |  | 
| @@ -556,7 +558,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi | |
| 556 558 | 
             
              @type bigquery_insert
         | 
| 557 559 |  | 
| 558 560 | 
             
              ...
         | 
| 559 | 
            -
             | 
| 561 | 
            +
             | 
| 560 562 | 
             
              fetch_schema true
         | 
| 561 563 | 
             
              # fetch_schema_table other_table # if you want to fetch schema from other table
         | 
| 562 564 | 
             
            </match>
         | 
| @@ -594,5 +596,5 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p | |
| 594 596 | 
             
            ## Authors
         | 
| 595 597 |  | 
| 596 598 | 
             
            * @tagomoris: First author, original version
         | 
| 597 | 
            -
            * KAIZEN platform Inc.:  | 
| 599 | 
            +
            * KAIZEN platform Inc.: Maintainer, Since 2014.08.19
         | 
| 598 600 | 
             
            * @joker1007
         | 
    
        data/integration/fluent.conf
    CHANGED
    
    | @@ -4,6 +4,14 @@ | |
| 4 4 | 
             
              bind 0.0.0.0
         | 
| 5 5 | 
             
            </source>
         | 
| 6 6 |  | 
| 7 | 
            +
            <source>
         | 
| 8 | 
            +
              @type dummy
         | 
| 9 | 
            +
              dummy {"json_field": {"foo": "val1", "bar": "val2", "hoge": 1}, "geography_field": {"type": "LineString", "coordinates": [[-118.4085, 33.9416], [-73.7781, 40.6413]]}, "timestamp_field": "2022-12-15T22:40:21+09:00", "date": "2022-12-15", "record_field": {"inner_field": "hoge", "inner_json": {"key1": "val1", "key2": "val2"}}, "repeated_string_field": ["a", "b", "c"]}
         | 
| 10 | 
            +
              auto_increment_key id
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              tag insert_data
         | 
| 13 | 
            +
            </source>
         | 
| 14 | 
            +
             | 
| 7 15 | 
             
            <match insert_data>
         | 
| 8 16 | 
             
              @id bigquery-insert-integration
         | 
| 9 17 | 
             
              @type bigquery_insert
         | 
| @@ -21,7 +29,7 @@ | |
| 21 29 | 
             
                total_limit_size 1g
         | 
| 22 30 | 
             
                path ./log/bigquery-insert-integration
         | 
| 23 31 |  | 
| 24 | 
            -
                flush_interval  | 
| 32 | 
            +
                flush_interval 15
         | 
| 25 33 | 
             
                flush_thread_count 4
         | 
| 26 34 | 
             
                flush_at_shutdown true
         | 
| 27 35 |  | 
| @@ -37,6 +45,7 @@ | |
| 37 45 | 
             
              dataset "#{ENV["DATASET_NAME"]}"
         | 
| 38 46 | 
             
              table "#{ENV["TABLE_NAME"]}"
         | 
| 39 47 | 
             
              auto_create_table false
         | 
| 48 | 
            +
              # schema_path integration/schema.json
         | 
| 40 49 | 
             
              fetch_schema true
         | 
| 41 50 | 
             
              fetch_schema_table "#{ENV["TABLE_NAME"]}"
         | 
| 42 51 |  | 
| @@ -78,6 +87,7 @@ | |
| 78 87 | 
             
              dataset "#{ENV["DATASET_NAME"]}"
         | 
| 79 88 | 
             
              table "#{ENV["TABLE_NAME"]}"
         | 
| 80 89 | 
             
              auto_create_table false
         | 
| 90 | 
            +
              # schema_path integration/schema.json
         | 
| 81 91 | 
             
              fetch_schema true
         | 
| 82 92 | 
             
              fetch_schema_table "#{ENV["TABLE_NAME"]}"
         | 
| 83 93 |  | 
    
        data/integration/schema.json
    CHANGED
    
    | @@ -9,6 +9,16 @@ | |
| 9 9 | 
             
                "type": "STRING",
         | 
| 10 10 | 
             
                "mode": "NULLABLE"
         | 
| 11 11 | 
             
              },
         | 
| 12 | 
            +
              {
         | 
| 13 | 
            +
                "name": "json_field",
         | 
| 14 | 
            +
                "type": "JSON",
         | 
| 15 | 
            +
                "mode": "NULLABLE"
         | 
| 16 | 
            +
              },
         | 
| 17 | 
            +
              {
         | 
| 18 | 
            +
                "name": "geography_field",
         | 
| 19 | 
            +
                "type": "GEOGRAPHY",
         | 
| 20 | 
            +
                "mode": "NULLABLE"
         | 
| 21 | 
            +
              },
         | 
| 12 22 | 
             
              {
         | 
| 13 23 | 
             
                "name": "timestamp_field",
         | 
| 14 24 | 
             
                "type": "TIMESTAMP",
         | 
| @@ -18,5 +28,27 @@ | |
| 18 28 | 
             
                "name": "date",
         | 
| 19 29 | 
             
                "type": "DATE",
         | 
| 20 30 | 
             
                "mode": "REQUIRED"
         | 
| 31 | 
            +
              },
         | 
| 32 | 
            +
              {
         | 
| 33 | 
            +
                "name": "record_field",
         | 
| 34 | 
            +
                "type": "RECORD",
         | 
| 35 | 
            +
                "mode": "NULLABLE",
         | 
| 36 | 
            +
                "fields": [
         | 
| 37 | 
            +
                  {
         | 
| 38 | 
            +
                    "name": "inner_field",
         | 
| 39 | 
            +
                    "type": "STRING",
         | 
| 40 | 
            +
                    "mode": "REQUIRED"
         | 
| 41 | 
            +
                  },
         | 
| 42 | 
            +
                  {
         | 
| 43 | 
            +
                    "name": "inner_json",
         | 
| 44 | 
            +
                    "type": "JSON",
         | 
| 45 | 
            +
                    "mode": "REQUIRED"
         | 
| 46 | 
            +
                  }
         | 
| 47 | 
            +
                ]
         | 
| 48 | 
            +
              },
         | 
| 49 | 
            +
              {
         | 
| 50 | 
            +
                "name": "repeated_string_field",
         | 
| 51 | 
            +
                "type": "STRING",
         | 
| 52 | 
            +
                "mode": "REPEATED"
         | 
| 21 53 | 
             
              }
         | 
| 22 54 | 
             
            ]
         | 
| @@ -23,23 +23,23 @@ module Fluent | |
| 23 23 |  | 
| 24 24 | 
             
                  attr_reader :name, :mode
         | 
| 25 25 |  | 
| 26 | 
            -
                  def format(value)
         | 
| 26 | 
            +
                  def format(value, is_load: false)
         | 
| 27 27 | 
             
                    case @mode
         | 
| 28 28 | 
             
                    when :nullable
         | 
| 29 | 
            -
                      format_one(value) unless value.nil?
         | 
| 29 | 
            +
                      format_one(value, is_load: is_load) unless value.nil?
         | 
| 30 30 | 
             
                    when :required
         | 
| 31 31 | 
             
                      if value.nil?
         | 
| 32 32 | 
             
                        log.warn "Required field #{name} cannot be null"
         | 
| 33 33 | 
             
                        nil
         | 
| 34 34 | 
             
                      else
         | 
| 35 | 
            -
                        format_one(value)
         | 
| 35 | 
            +
                        format_one(value, is_load: is_load)
         | 
| 36 36 | 
             
                      end
         | 
| 37 37 | 
             
                    when :repeated
         | 
| 38 | 
            -
                      value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
         | 
| 38 | 
            +
                      value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v, is_load: true) if v }
         | 
| 39 39 | 
             
                    end
         | 
| 40 40 | 
             
                  end
         | 
| 41 41 |  | 
| 42 | 
            -
                  def format_one(value)
         | 
| 42 | 
            +
                  def format_one(value, is_load: false)
         | 
| 43 43 | 
             
                    raise NotImplementedError, "Must implement in a subclass"
         | 
| 44 44 | 
             
                  end
         | 
| 45 45 |  | 
| @@ -57,7 +57,7 @@ module Fluent | |
| 57 57 | 
             
                    :string
         | 
| 58 58 | 
             
                  end
         | 
| 59 59 |  | 
| 60 | 
            -
                  def format_one(value)
         | 
| 60 | 
            +
                  def format_one(value, is_load: false)
         | 
| 61 61 | 
             
                    if value.is_a?(Hash) || value.is_a?(Array)
         | 
| 62 62 | 
             
                      MultiJson.dump(value)
         | 
| 63 63 | 
             
                    else
         | 
| @@ -66,12 +66,32 @@ module Fluent | |
| 66 66 | 
             
                  end
         | 
| 67 67 | 
             
                end
         | 
| 68 68 |  | 
| 69 | 
            +
                class JsonFieldSchema < FieldSchema
         | 
| 70 | 
            +
                  def type
         | 
| 71 | 
            +
                    :json
         | 
| 72 | 
            +
                  end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                  def format_one(value, is_load: false)
         | 
| 75 | 
            +
                    if is_load
         | 
| 76 | 
            +
                      value
         | 
| 77 | 
            +
                    else
         | 
| 78 | 
            +
                      MultiJson.dump(value)
         | 
| 79 | 
            +
                    end
         | 
| 80 | 
            +
                  end
         | 
| 81 | 
            +
                end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                class GeographyFieldSchema < StringFieldSchema
         | 
| 84 | 
            +
                  def type
         | 
| 85 | 
            +
                    :geography
         | 
| 86 | 
            +
                  end
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 69 89 | 
             
                class IntegerFieldSchema < FieldSchema
         | 
| 70 90 | 
             
                  def type
         | 
| 71 91 | 
             
                    :integer
         | 
| 72 92 | 
             
                  end
         | 
| 73 93 |  | 
| 74 | 
            -
                  def format_one(value)
         | 
| 94 | 
            +
                  def format_one(value, is_load: false)
         | 
| 75 95 | 
             
                    value.to_i
         | 
| 76 96 | 
             
                  end
         | 
| 77 97 | 
             
                end
         | 
| @@ -81,7 +101,7 @@ module Fluent | |
| 81 101 | 
             
                    :float
         | 
| 82 102 | 
             
                  end
         | 
| 83 103 |  | 
| 84 | 
            -
                  def format_one(value)
         | 
| 104 | 
            +
                  def format_one(value, is_load: false)
         | 
| 85 105 | 
             
                    value.to_f
         | 
| 86 106 | 
             
                  end
         | 
| 87 107 | 
             
                end
         | 
| @@ -91,17 +111,27 @@ module Fluent | |
| 91 111 | 
             
                    :numeric
         | 
| 92 112 | 
             
                  end
         | 
| 93 113 |  | 
| 94 | 
            -
                  def format_one(value)
         | 
| 114 | 
            +
                  def format_one(value, is_load: false)
         | 
| 95 115 | 
             
                    value.to_s
         | 
| 96 116 | 
             
                  end
         | 
| 97 117 | 
             
                end
         | 
| 98 118 |  | 
| 119 | 
            +
                class BigNumericFieldSchema < FieldSchema
         | 
| 120 | 
            +
                  def type
         | 
| 121 | 
            +
                    :bignumeric
         | 
| 122 | 
            +
                  end
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                  def format_one(value, is_load: false)
         | 
| 125 | 
            +
                    value.to_s
         | 
| 126 | 
            +
                  end
         | 
| 127 | 
            +
                end    
         | 
| 128 | 
            +
             | 
| 99 129 | 
             
                class BooleanFieldSchema < FieldSchema
         | 
| 100 130 | 
             
                  def type
         | 
| 101 131 | 
             
                    :boolean
         | 
| 102 132 | 
             
                  end
         | 
| 103 133 |  | 
| 104 | 
            -
                  def format_one(value)
         | 
| 134 | 
            +
                  def format_one(value, is_load: false)
         | 
| 105 135 | 
             
                    !!value
         | 
| 106 136 | 
             
                  end
         | 
| 107 137 | 
             
                end
         | 
| @@ -114,7 +144,7 @@ module Fluent | |
| 114 144 | 
             
                    :timestamp
         | 
| 115 145 | 
             
                  end
         | 
| 116 146 |  | 
| 117 | 
            -
                  def format_one(value)
         | 
| 147 | 
            +
                  def format_one(value, is_load: false)
         | 
| 118 148 | 
             
                    case value
         | 
| 119 149 | 
             
                    when Time
         | 
| 120 150 | 
             
                      value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
         | 
| @@ -137,7 +167,7 @@ module Fluent | |
| 137 167 | 
             
                    :date
         | 
| 138 168 | 
             
                  end
         | 
| 139 169 |  | 
| 140 | 
            -
                  def format_one(value)
         | 
| 170 | 
            +
                  def format_one(value, is_load: false)
         | 
| 141 171 | 
             
                    if value.respond_to?(:strftime)
         | 
| 142 172 | 
             
                      value.strftime("%Y-%m-%d")
         | 
| 143 173 | 
             
                    else
         | 
| @@ -151,7 +181,7 @@ module Fluent | |
| 151 181 | 
             
                    :datetime
         | 
| 152 182 | 
             
                  end
         | 
| 153 183 |  | 
| 154 | 
            -
                  def format_one(value)
         | 
| 184 | 
            +
                  def format_one(value, is_load: false)
         | 
| 155 185 | 
             
                    if value.respond_to?(:strftime)
         | 
| 156 186 | 
             
                      value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
         | 
| 157 187 | 
             
                    else
         | 
| @@ -165,7 +195,7 @@ module Fluent | |
| 165 195 | 
             
                    :time
         | 
| 166 196 | 
             
                  end
         | 
| 167 197 |  | 
| 168 | 
            -
                  def format_one(value)
         | 
| 198 | 
            +
                  def format_one(value, is_load: false)
         | 
| 169 199 | 
             
                    if value.respond_to?(:strftime)
         | 
| 170 200 | 
             
                      value.strftime("%H:%M:%S.%6L")
         | 
| 171 201 | 
             
                    else
         | 
| @@ -180,11 +210,14 @@ module Fluent | |
| 180 210 | 
             
                    integer: IntegerFieldSchema,
         | 
| 181 211 | 
             
                    float: FloatFieldSchema,
         | 
| 182 212 | 
             
                    numeric: NumericFieldSchema,
         | 
| 213 | 
            +
                    bignumeric: BigNumericFieldSchema,
         | 
| 183 214 | 
             
                    boolean: BooleanFieldSchema,
         | 
| 184 215 | 
             
                    timestamp: TimestampFieldSchema,
         | 
| 185 216 | 
             
                    date: DateFieldSchema,
         | 
| 186 217 | 
             
                    datetime: DateTimeFieldSchema,
         | 
| 187 218 | 
             
                    time: TimeFieldSchema,
         | 
| 219 | 
            +
                    json: JsonFieldSchema,
         | 
| 220 | 
            +
                    geography: GeographyFieldSchema,
         | 
| 188 221 | 
             
                    record: RecordSchema
         | 
| 189 222 | 
             
                  }.freeze
         | 
| 190 223 |  | 
| @@ -256,12 +289,12 @@ module Fluent | |
| 256 289 | 
             
                    end
         | 
| 257 290 | 
             
                  end
         | 
| 258 291 |  | 
| 259 | 
            -
                  def format_one(record)
         | 
| 292 | 
            +
                  def format_one(record, is_load: false)
         | 
| 260 293 | 
             
                    out = {}
         | 
| 261 294 | 
             
                    record.each do |key, value|
         | 
| 262 295 | 
             
                      next if value.nil?
         | 
| 263 296 | 
             
                      schema = @fields[key]
         | 
| 264 | 
            -
                      out[key] = schema ? schema.format(value) : value
         | 
| 297 | 
            +
                      out[key] = schema ? schema.format(value, is_load: is_load) : value
         | 
| 265 298 | 
             
                    end
         | 
| 266 299 | 
             
                    out
         | 
| 267 300 | 
             
                  end
         | 
| @@ -69,7 +69,7 @@ module Fluent | |
| 69 69 | 
             
                  config_param :request_open_timeout_sec, :time, default: 60
         | 
| 70 70 |  | 
| 71 71 | 
             
                  ## Partitioning
         | 
| 72 | 
            -
                  config_param :time_partitioning_type, :enum, list: [:day], default: nil
         | 
| 72 | 
            +
                  config_param :time_partitioning_type, :enum, list: [:day, :hour], default: nil
         | 
| 73 73 | 
             
                  config_param :time_partitioning_field, :string, default: nil
         | 
| 74 74 | 
             
                  config_param :time_partitioning_expiration, :time, default: nil
         | 
| 75 75 |  | 
| @@ -172,7 +172,7 @@ module Fluent | |
| 172 172 | 
             
                      end
         | 
| 173 173 |  | 
| 174 174 | 
             
                    begin
         | 
| 175 | 
            -
                      row = schema.format(record)
         | 
| 175 | 
            +
                      row = schema.format(record, is_load: !!@is_load)
         | 
| 176 176 | 
             
                      return if row.empty?
         | 
| 177 177 | 
             
                      @formatter.format(tag, time, row)
         | 
| 178 178 | 
             
                    rescue
         | 
| @@ -36,6 +36,7 @@ module Fluent | |
| 36 36 |  | 
| 37 37 | 
             
                  def configure(conf)
         | 
| 38 38 | 
             
                    super
         | 
| 39 | 
            +
                    @is_load = true
         | 
| 39 40 |  | 
| 40 41 | 
             
                    placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
         | 
| 41 42 | 
             
                    placeholder_validate!(:bigquery_load, placeholder_params)
         | 
| @@ -29,9 +29,14 @@ class RecordSchemaTest < Test::Unit::TestCase | |
| 29 29 | 
             
                    "mode" => "REPEATED"
         | 
| 30 30 | 
             
                  },
         | 
| 31 31 | 
             
                  {
         | 
| 32 | 
            -
                    "name" => " | 
| 32 | 
            +
                    "name" => "utilization",
         | 
| 33 33 | 
             
                    "type" => "NUMERIC",
         | 
| 34 34 | 
             
                    "mode" => "NULLABLE"
         | 
| 35 | 
            +
                  },
         | 
| 36 | 
            +
                  {
         | 
| 37 | 
            +
                    "name" => "bigutilization",
         | 
| 38 | 
            +
                    "type" => "BIGNUMERIC",
         | 
| 39 | 
            +
                    "mode" => "NULLABLE"
         | 
| 35 40 | 
             
                  }
         | 
| 36 41 | 
             
                ]
         | 
| 37 42 | 
             
              end
         | 
| @@ -64,15 +69,20 @@ class RecordSchemaTest < Test::Unit::TestCase | |
| 64 69 | 
             
                    "mode" => "REPEATED"
         | 
| 65 70 | 
             
                  },
         | 
| 66 71 | 
             
                  {
         | 
| 67 | 
            -
                    "name" => " | 
| 72 | 
            +
                    "name" => "utilization",
         | 
| 68 73 | 
             
                    "type" => "NUMERIC",
         | 
| 69 74 | 
             
                    "mode" => "NULLABLE"
         | 
| 70 75 | 
             
                  },
         | 
| 76 | 
            +
                  {
         | 
| 77 | 
            +
                    "name" => "bigutilization",
         | 
| 78 | 
            +
                    "type" => "BIGNUMERIC",
         | 
| 79 | 
            +
                    "mode" => "NULLABLE"
         | 
| 80 | 
            +
                  },
         | 
| 71 81 | 
             
                  {
         | 
| 72 82 | 
             
                    "name" => "new_column",
         | 
| 73 83 | 
             
                    "type" => "STRING",
         | 
| 74 84 | 
             
                    "mode" => "REQUIRED"
         | 
| 75 | 
            -
                  }
         | 
| 85 | 
            +
                  },
         | 
| 76 86 | 
             
                ]
         | 
| 77 87 | 
             
              end
         | 
| 78 88 |  | 
| @@ -104,9 +114,14 @@ class RecordSchemaTest < Test::Unit::TestCase | |
| 104 114 | 
             
                    "mode" => "REPEATED"
         | 
| 105 115 | 
             
                  },
         | 
| 106 116 | 
             
                  {
         | 
| 107 | 
            -
                    "name" => " | 
| 117 | 
            +
                    "name" => "utilization",
         | 
| 108 118 | 
             
                    "type" => "NUMERIC",
         | 
| 109 119 | 
             
                    "mode" => "NULLABLE"
         | 
| 120 | 
            +
                  },
         | 
| 121 | 
            +
                  {
         | 
| 122 | 
            +
                    "name" => "bigutilization",
         | 
| 123 | 
            +
                    "type" => "BIGNUMERIC",
         | 
| 124 | 
            +
                    "mode" => "NULLABLE"
         | 
| 110 125 | 
             
                  }
         | 
| 111 126 | 
             
                ]
         | 
| 112 127 | 
             
              end
         | 
| @@ -157,12 +172,12 @@ class RecordSchemaTest < Test::Unit::TestCase | |
| 157 172 | 
             
                time = Time.local(2016, 2, 7, 19, 0, 0).utc
         | 
| 158 173 |  | 
| 159 174 | 
             
                formatted = fields.format_one({
         | 
| 160 | 
            -
                  "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], " | 
| 175 | 
            +
                  "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilization" => "0.837", "bigutilization" => "0.837"
         | 
| 161 176 | 
             
                })
         | 
| 162 177 | 
             
                assert_equal(
         | 
| 163 178 | 
             
                  formatted,
         | 
| 164 179 | 
             
                  {
         | 
| 165 | 
            -
                    "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], " | 
| 180 | 
            +
                    "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilization" => "0.837", "bigutilization" => "0.837"
         | 
| 166 181 | 
             
                  }
         | 
| 167 182 | 
             
                )
         | 
| 168 183 | 
             
              end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: fluent-plugin-bigquery
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 3.0 | 
| 4 | 
            +
              version: 3.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Naoya Ito
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire:
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date:  | 
| 12 | 
            +
            date: 2024-10-26 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: rake
         | 
| @@ -142,6 +142,7 @@ files: | |
| 142 142 | 
             
            - ".github/workflows/linux.yml"
         | 
| 143 143 | 
             
            - ".github/workflows/windows.yml"
         | 
| 144 144 | 
             
            - ".gitignore"
         | 
| 145 | 
            +
            - CHANGELOG.md
         | 
| 145 146 | 
             
            - Gemfile
         | 
| 146 147 | 
             
            - LICENSE.txt
         | 
| 147 148 | 
             
            - README.md
         | 
| @@ -190,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 190 191 | 
             
                - !ruby/object:Gem::Version
         | 
| 191 192 | 
             
                  version: '0'
         | 
| 192 193 | 
             
            requirements: []
         | 
| 193 | 
            -
            rubygems_version: 3. | 
| 194 | 
            +
            rubygems_version: 3.5.11
         | 
| 194 195 | 
             
            signing_key:
         | 
| 195 196 | 
             
            specification_version: 4
         | 
| 196 197 | 
             
            summary: Fluentd plugin to store data on Google BigQuery
         |