RubyGems - fluent-plugin-bigquery - Versions diffs - 1.2.0 → 2.0.0.beta - Mend

fluent-plugin-bigquery 1.2.0 → 2.0.0.beta

Files changed (15) hide show

checksums.yaml +4 -4
data/.travis.yml +2 -9
data/README.md +68 -65
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +45 -39
data/lib/fluent/plugin/out_bigquery_base.rb +211 -0
data/lib/fluent/plugin/out_bigquery_insert.rb +131 -0
data/lib/fluent/plugin/out_bigquery_load.rb +220 -0
data/test/helper.rb +3 -1
data/test/plugin/test_out_bigquery_base.rb +579 -0
data/test/plugin/test_out_bigquery_insert.rb +420 -0
data/test/plugin/test_out_bigquery_load.rb +310 -0
metadata +13 -7
data/lib/fluent/plugin/out_bigquery.rb +0 -500
data/test/plugin/test_out_bigquery.rb +0 -1276

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ef9d7a814ddde267ffd9bf2e10837c162a01b12b
-  data.tar.gz: 5ca29129b5dfb449abd7e1a22d6d22fcba47a862
+  metadata.gz: '07998acf05ddb3e647da13a4b5c734dc16f8cc77'
+  data.tar.gz: 1fce9fc906cbf72083a4f8132c0ac1d985a95d6d
 SHA512:
-  metadata.gz: 658fb6ece7816aeb711960179fa53544fe639f97aa41bbfea1a0fe007edf1a54fe11aef4c63f0ea1d42f2b2582c9ed335c5919aaec2ec18f3d53fdfa3e9d9be1
-  data.tar.gz: 5ac9138e736eaf7dcf5b9fe1ecd46b92c67979313d93862fd061640f5dc5fc8f4bef3364cb0f053ab3a5a9d7d34ffeb1fef58dbbb532f59f99befe899b9066c6
+  metadata.gz: 04cfd6d3080d9424e25bd75ae1a9600259fe94ed933adceab66c02eb11afdb49eeddc393c305f0927dd64f967d1e72835fde9566cd54b2e53805e85ffe7a1516
+  data.tar.gz: 8de74527cf12be2c6553e4a582cc25c47a1773cdc165800f212aae563f7ffa048679260515a51f55e244b641b968badeaa4349cf4369ad2363d22aff1c1cbe7d

data/.travis.yml CHANGED

@@ -1,20 +1,13 @@
 language: ruby
 rvm:
-  - 2.1
   - 2.2
   - 2.3.3
+  - 2.4.3
+  - 2.5.0
 gemfile:
   - Gemfile
-  - gemfiles/activesupport-4.gemfile
-matrix:
-  exclude:
-    - rvm: 2.0
-      gemfile: Gemfile
-    - rvm: 2.1
-      gemfile: Gemfile
 before_install:
   - gem update bundler

data/README.md CHANGED

@@ -1,13 +1,17 @@
 # fluent-plugin-bigquery
+**This README is for v2.0.0.beta. but it is not released yet. sorry.**
 [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
-- **Plugin type**: BufferedOutput
+- **Plugin type**: Output
 * insert data over streaming inserts
+  * plugin type is `bigquery_insert`
   * for continuous real-time insertions
   * https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
 * load data
+  * plugin type is `bigquery_load`
   * for data loading as batch jobs, for big amount of data
   * https://developers.google.com/bigquery/loading-data-into-bigquery
@@ -31,42 +35,47 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 ### Options
-| name                                   | type          | required?                                    | placeholder? | default                    | description                                                                                                                                                                                |
-| :------------------------------------- | :------------ | :-----------                                 | :----------  | :------------------------- | :-----------------------                                                                                                                                                                   |
-| method                                 | string        | no                                           | no           | insert                     | `insert` (Streaming Insert) or `load` (load job)                                                                                                                                           |
-| auth_method                            | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default`                                                                                                                   |
-| email                                  | string        | yes (private_key)                            | no           | nil                        | GCP Service Account Email                                                                                                                                                                  |
-| private_key_path                       | string        | yes (private_key)                            | no           | nil                        | GCP Private Key file path                                                                                                                                                                  |
-| private_key_passphrase                 | string        | yes (private_key)                            | no           | nil                        | GCP Private Key Passphrase                                                                                                                                                                 |
-| json_key                               | string        | yes (json_key)                               | no           | nil                        | GCP JSON Key file path or JSON Key string                                                                                                                                                  |
-| project                                | string        | yes                                          | yes          | nil                        |                                                                                                                                                                                            |
-| dataset                                | string        | yes                                          | yes          | nil                        |                                                                                                                                                                                            |
-| table                                  | string        | yes (either `tables`)                        | yes          | nil                        |                                                                                                                                                                                            |
-| tables                                 | array(string) | yes (either `table`)                         | yes          | nil                        | can set multi table names splitted by `,`                                                                                                                                                  |
-| template_suffix                        | string        | no                                           | yes          | nil                        | can use `%{time_slice}` placeholder replaced by `time_slice_format`                                                                                                                        |
-| auto_create_table                      | bool          | no                                           | no           | false                      | If true, creates table automatically                                                                                                                                                       |
-| skip_invalid_rows                      | bool          | no                                           | no           | false                      | Only `insert` method.                                                                                                                                                                      |
-| max_bad_records                        | integer       | no                                           | no           | 0                          | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result.                                                                       |
-| ignore_unknown_values                  | bool          | no                                           | no           | false                      | Accept rows that contain values that do not match the schema. The unknown values are ignored.                                                                                              |
-| schema                                 | array         | yes (either `fetch_schema` or `schema_path`) | no           | nil                        | Schema Definition. It is formatted by JSON.                                                                                                                                                |
-| schema_path                            | string        | yes (either `fetch_schema`)                  | no           | nil                        | Schema Definition file path. It is formatted by JSON.                                                                                                                                      |
-| fetch_schema                           | bool          | yes (either `schema_path`)                   | no           | false                      | If true, fetch table schema definition from Bigquery table automatically.                                                                                                                  |
-| fetch_schema_table                     | string        | no                                           | yes          | nil                        | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored                                                                                     |
-| schema_cache_expire                    | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.                                                                                           |
-| insert_id_field                        | string        | no                                           | no           | nil                        | Use key as `insert_id` of Streaming Insert API parameter.                                                                                                                                  |
-| add_insert_timestamp                   | string        | no                                           | no           | nil                        | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
-| allow_retry_insert_errors              | bool          | no                                           | no           | false                      | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate.                                                                              |
-| request_timeout_sec                    | integer       | no                                           | no           | nil                        | Bigquery API response timeout                                                                                                                                                              |
-| request_open_timeout_sec               | integer       | no                                           | no           | 60                         | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value.                                                                                           |
-| time_partitioning_type                 | enum          | no (either day)                              | no           | nil                        | Type of bigquery time partitioning feature(experimental feature on BigQuery).                                                                                                              |
-| time_partitioning_expiration           | time          | no                                           | no           | nil                        | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery)                                                                                                 |
-### Deprecated
-| name                                   | type          | required?    | placeholder? | default                    | description              |
-| :------------------------------------- | :------------ | :----------- | :----------  | :------------------------- | :----------------------- |
-| replace_record_key                     | bool          | no           | no           | false                      | Use other filter plugin. |
-| replace_record_key_regexp{1-10}        | string        | no           | no           | nil                        |                          |
+#### common
+| name                                   | type          | required?                                    | placeholder? | default                    | description                                                                                            |
+| :------------------------------------- | :------------ | :-----------                                 | :----------  | :------------------------- | :-----------------------                                                                               |
+| auth_method                            | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default`                               |
+| email                                  | string        | yes (private_key)                            | no           | nil                        | GCP Service Account Email                                                                              |
+| private_key_path                       | string        | yes (private_key)                            | no           | nil                        | GCP Private Key file path                                                                              |
+| private_key_passphrase                 | string        | yes (private_key)                            | no           | nil                        | GCP Private Key Passphrase                                                                             |
+| json_key                               | string        | yes (json_key)                               | no           | nil                        | GCP JSON Key file path or JSON Key string                                                              |
+| project                                | string        | yes                                          | yes          | nil                        |                                                                                                        |
+| dataset                                | string        | yes                                          | yes          | nil                        |                                                                                                        |
+| table                                  | string        | yes (either `tables`)                        | yes          | nil                        |                                                                                                        |
+| tables                                 | array(string) | yes (either `table`)                         | yes          | nil                        | can set multi table names splitted by `,`                                                              |
+| auto_create_table                      | bool          | no                                           | no           | false                      | If true, creates table automatically                                                                   |
+| ignore_unknown_values                  | bool          | no                                           | no           | false                      | Accept rows that contain values that do not match the schema. The unknown values are ignored.          |
+| schema                                 | array         | yes (either `fetch_schema` or `schema_path`) | no           | nil                        | Schema Definition. It is formatted by JSON.                                                            |
+| schema_path                            | string        | yes (either `fetch_schema`)                  | no           | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
+| fetch_schema                           | bool          | yes (either `schema_path`)                   | no           | false                      | If true, fetch table schema definition from Bigquery table automatically.                              |
+| fetch_schema_table                     | string        | no                                           | yes          | nil                        | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
+| schema_cache_expire                    | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
+| request_timeout_sec                    | integer       | no                                           | no           | nil                        | Bigquery API response timeout                                                                          |
+| request_open_timeout_sec               | integer       | no                                           | no           | 60                         | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value.       |
+| time_partitioning_type                 | enum          | no (either day)                              | no           | nil                        | Type of bigquery time partitioning feature(experimental feature on BigQuery).                          |
+| time_partitioning_expiration           | time          | no                                           | no           | nil                        | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery)             |
+#### bigquery_insert
+| name                                   | type          | required?    | placeholder? | default                    | description                                                                                                                                                                                |
+| :------------------------------------- | :------------ | :----------- | :----------  | :------------------------- | :-----------------------                                                                                                                                                                   |
+| template_suffix                        | string        | no           | yes          | nil                        | can use `%{time_slice}` placeholder replaced by `time_slice_format`                                                                                                                        |
+| skip_invalid_rows                      | bool          | no           | no           | false                      |                                                                                                                                                                                            |
+| insert_id_field                        | string        | no           | no           | nil                        | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor                                                    |
+| add_insert_timestamp                   | string        | no           | no           | nil                        | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
+| allow_retry_insert_errors              | bool          | no           | no           | false                      | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate.                                                                              |
+#### bigquery_load
+| name                                   | type          | required?    | placeholder? | default                    | description                                                                                                                                    |
+| :------------------------------------- | :------------ | :----------- | :----------  | :------------------------- | :-----------------------                                                                                                                       |
+| source_format                          | enum          | no           | no           | json                       | Specify source format `json` or `csv` or `avro`. If you change this parameter, you must change formatter plugin via `<format>` config section. |
+| max_bad_records                        | integer       | no           | no           | 0                          | If the number of bad records exceeds this value, an invalid error is returned in the job result.                                               |
 ### Buffer section
@@ -77,9 +86,9 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | total_limit_size                       | integer       | no           | 1GB (insert) or 32GB (load)    |                                    |
 | chunk_records_limit                    | integer       | no           | 500 (insert) or nil (load)     |                                    |
 | flush_mode                             | enum          | no           | interval                       | default, lazy, interval, immediate |
-| flush_interval                         | float         | no           | 0.25 (insert) or nil (load)    |                                    |
-| flush_thread_interval                  | float         | no           | 0.05 (insert) or nil (load)    |                                    |
-| flush_thread_burst_interval            | float         | no           | 0.05 (insert) or nil (load)    |                                    |
+| flush_interval                         | float         | no           | 1.0 (insert) or 3600 (load)    |                                    |
+| flush_thread_interval                  | float         | no           | 0.05 (insert) or 5 (load)      |                                    |
+| flush_thread_burst_interval            | float         | no           | 0.05 (insert) or 5 (load)      |                                    |
 And, other params (defined by base class) are available
@@ -142,9 +151,7 @@ Configure insert specifications with target table schema, with your credentials.
 ```apache
 <match dummy>
-  @type bigquery
-  method insert    # default
+  @type bigquery_insert
   auth_method private_key   # default
   email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
@@ -181,14 +188,12 @@ For high rate inserts over streaming inserts, you should specify flush intervals
 ```apache
 <match dummy>
-  @type bigquery
-  method insert    # default
+  @type bigquery_insert
   <buffer>
     flush_interval 0.1  # flush as frequent as possible
-    buffer_queue_limit 10240        # 1MB * 10240 -> 10GB!
+    total_limit_size 10g
     flush_thread_count 16
   </buffer>
@@ -256,16 +261,12 @@ section in the Google BigQuery document.
 ### Load
 ```apache
 <match bigquery>
-  @type bigquery
-  method load
+  @type bigquery_load
   <buffer>
-  @type file
-  path bigquery.*.buffer
-  flush_interval 1800
-  flush_at_shutdown true
-  timekey_use_utc
+    path bigquery.*.buffer
+    flush_at_shutdown true
+    timekey_use_utc
   </buffer>
   auth_method json_key
@@ -302,7 +303,7 @@ download its JSON key and deploy the key with fluentd.
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   auth_method json_key
   json_key /home/username/.keys/00000000000000000000000000000000-jsonkey.json
@@ -319,7 +320,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   auth_method json_key
   json_key {"private_key": "-----BEGIN PRIVATE KEY-----\n...", "client_email": "xxx@developer.gserviceaccount.com"}
@@ -340,7 +341,7 @@ Compute Engine instance, then you can configure fluentd like this.
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   auth_method compute_engine
@@ -382,7 +383,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...
@@ -430,7 +431,7 @@ Use placeholder.
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...
   table   accesslog$%Y%m%d
@@ -453,7 +454,7 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...
@@ -477,7 +478,7 @@ you can also specify nested fields by prefixing their belonging record fields.
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...
@@ -528,7 +529,7 @@ The second method is to specify a path to a BigQuery schema file instead of list
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...
@@ -541,7 +542,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...
@@ -559,10 +560,12 @@ you are still recommended to specify JSON types for TIMESTAMP fields as "time" f
 BigQuery uses `insertId` property to detect duplicate insertion requests (see [data consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) in Google BigQuery documents).
 You can set `insert_id_field` option to specify the field to use as `insertId` property.
+`insert_id_field` can use fluentd record_accessor format like `$['key1'][0]['key2']`.
+(detail. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)
 ```apache
 <match dummy>
-  @type bigquery
+  @type bigquery_insert
   ...

data/lib/fluent/plugin/bigquery/version.rb CHANGED

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "1.2.0".freeze
+    VERSION = "2.0.0.beta".freeze
   end
 end

data/lib/fluent/plugin/bigquery/writer.rb CHANGED

@@ -7,22 +7,15 @@ module Fluent
         @options = options
         @log = log
         @num_errors_per_chunk = {}
-        @cached_client_expiration = Time.now + 1800
       end
       def client
-        return @client if @client && @cached_client_expiration > Time.now
-        client = Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
+        @client ||= Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
           cl.authorization = get_auth
           cl.client_options.open_timeout_sec = @options[:open_timeout_sec] if @options[:open_timeout_sec]
           cl.client_options.read_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
           cl.client_options.send_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
         end
-        @cached_client_expiration = Time.now + 1800
-        @client = client
       end
       def create_table(project, dataset, table_id, record_schema)
@@ -49,10 +42,7 @@ module Fluent
           end
           client.insert_table(project, dataset, definition, {})
           log.debug "create table", project_id: project, dataset: dataset, table: table_id
-          @client = nil
         rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-          @client = nil
           message = e.message
           if e.status_code == 409 && /Already Exists:/ =~ message
             log.debug "already created table", project_id: project, dataset: dataset, table: table_id
@@ -81,7 +71,6 @@ module Fluent
         schema
       rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-        @client = nil
         message = e.message
         log.error "tables.get API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
         nil
@@ -111,8 +100,6 @@ module Fluent
           end
         end
       rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-        @client = nil
         reason = e.respond_to?(:reason) ? e.reason : nil
         error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
         wrapped = Fluent::BigQuery::Error.wrap(e)
@@ -125,7 +112,17 @@ module Fluent
         raise wrapped
       end
-      def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields)
+      JobReference = Struct.new(:chunk_id, :chunk_id_hex, :project_id, :dataset_id, :table_id, :job_id) do
+        def as_hash(*keys)
+          if keys.empty?
+            to_h
+          else
+            to_h.select { |k, _| keys.include?(k) }
+          end
+        end
+      end
+      def create_load_job(chunk_id, chunk_id_hex, project, dataset, table_id, upload_source, fields)
         configuration = {
           configuration: {
             load: {
@@ -145,7 +142,7 @@ module Fluent
           }
         }
-        job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
+        job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
         configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
         configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
@@ -167,11 +164,8 @@ module Fluent
             content_type: "application/octet-stream",
           }
         )
-        wait_load_job(chunk_id, project, dataset, res.job_reference.job_id, table_id)
-        @num_errors_per_chunk.delete(chunk_id)
+        JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
       rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-        @client = nil
         reason = e.respond_to?(:reason) ? e.reason : nil
         log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
@@ -187,44 +181,56 @@ module Fluent
         end
         if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
-          wait_load_job(chunk_id, project, dataset, job_id, table_id)
-          @num_errors_per_chunk.delete(chunk_id)
-          return
+          return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
         end
         raise Fluent::BigQuery::Error.wrap(e)
       end
-      def wait_load_job(chunk_id, project, dataset, job_id, table_id)
-        wait_interval = 10
-        _response = client.get_job(project, job_id)
+      def fetch_load_job(job_reference)
+        project = job_reference.project_id
+        job_id = job_reference.job_id
+        res = client.get_job(project, job_id)
+        log.debug "load job fetched", id: job_id, state: res.status.state, **job_reference.as_hash(:project_id, :dataset_id, :table_id)
-        until _response.status.state == "DONE"
-          log.debug "wait for load job finish", state: _response.status.state, job_id: _response.job_reference.job_id
-          sleep wait_interval
-          _response = client.get_job(project, _response.job_reference.job_id)
+        if res.status.state == "DONE"
+          res
         end
+      rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
+        e = Fluent::BigQuery::Error.wrap(e)
+        raise e unless e.retryable?
+      end
+      def commit_load_job(chunk_id_hex, response)
+        job_id = response.id
+        project = response.configuration.load.destination_table.project_id
+        dataset = response.configuration.load.destination_table.dataset_id
+        table_id = response.configuration.load.destination_table.table_id
-        errors = _response.status.errors
+        errors = response.status.errors
         if errors
           errors.each do |e|
-            log.error "job.insert API (rows)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: e.message, reason: e.reason
+            log.error "job.load API (rows)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: e.message, reason: e.reason
           end
         end
-        error_result = _response.status.error_result
+        error_result = response.status.error_result
         if error_result
-          log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
+          log.error "job.load API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
           if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
-            @num_errors_per_chunk[chunk_id] = @num_errors_per_chunk[chunk_id].to_i + 1
+            @num_errors_per_chunk[chunk_id_hex] = @num_errors_per_chunk[chunk_id_hex].to_i + 1
             raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
           else
-            @num_errors_per_chunk.delete(chunk_id)
+            @num_errors_per_chunk.delete(chunk_id_hex)
             raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
           end
         end
-        log.debug "finish load job", state: _response.status.state
+        stats = response.statistics.load
+        duration = (response.statistics.end_time - response.statistics.creation_time) / 1000.0
+        log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats.input_file_bytes, input_files: stats.input_files, output_bytes: stats.output_bytes, output_rows: stats.output_rows, bad_records: stats.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
+        @num_errors_per_chunk.delete(chunk_id_hex)
       end
       private
@@ -291,8 +297,8 @@ module Fluent
         table_id.gsub(/\$\d+$/, "")
       end
-      def create_job_id(chunk_id, dataset, table, schema)
-        job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id]}"
+      def create_job_id(chunk_id_hex, dataset, table, schema)
+        job_id_key = "#{chunk_id_hex}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id_hex]}"
         @log.debug "job_id_key: #{job_id_key}"
         "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
       end