RubyGems - fluent-plugin-bigquery - Versions diffs - 2.0.0 → 2.1.0 - Mend

fluent-plugin-bigquery 2.0.0 → 2.1.0

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +24 -25
data/lib/fluent/plugin/bigquery/errors.rb +6 -10
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +57 -37
data/lib/fluent/plugin/out_bigquery_base.rb +3 -1
data/lib/fluent/plugin/out_bigquery_insert.rb +1 -7
data/lib/fluent/plugin/out_bigquery_load.rb +1 -0
data/test/plugin/test_out_bigquery_insert.rb +43 -9
data/test/plugin/test_out_bigquery_load.rb +56 -18
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: da6ea2c72e47fb9193731dd419914e5e0a7974a4a4b7013299547856efa94efe
-  data.tar.gz: fc240aa17d7896d58f56e87a1c23dccad6850ecfa580c195c2cd1448ff7ba5f7
+  metadata.gz: b463f412345eb71d1b263bf56e0cd51ebe1c2dacffaa223293edb8d4e5776e73
+  data.tar.gz: f5f7766b2d0f4498239389ef38eb29ef9d20dbe9b118890e8d651b23330d33ca
 SHA512:
-  metadata.gz: f115777a0f822c01872d9ee0e9bbc8f7da409dd4f69937a5518d82c302b42c8332775e199819830856f11ce21ec1661577fce29af25a09651e228567414384bf
-  data.tar.gz: b2b210f0f04f7e5490dc9853cbd3ccbbf6e295a5d01eebe9052d54afdcbe4694b66cc2ea50693481a79450bdada838c9d6c73f1329f1ce8f14a0abed23dfbf5d
+  metadata.gz: 8d3851b83d9cbc7c802836dc5f5709d2f92009f980a3a6d3566730eea55fdaf697540c0370220441ed1d88687c27eb8677506e9897693469ef4fcb347d1e7825
+  data.tar.gz: 39223f99503c53a812549b4ff8de2a94c3b7db670e6dd9819840d86d561fe68c922f82c18f0201abe8625b2dbf79d0741413d21c56d9d0855b1889b68946a2f8

data/README.md CHANGED

@@ -1,7 +1,5 @@
 # fluent-plugin-bigquery
-**This README is for v2.0.0.beta**
 [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
 - **Plugin type**: Output
@@ -39,29 +37,30 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 #### common
-| name                                   | type          | required?                                    | placeholder? | default                    | description                                                                                            |
-| :------------------------------------- | :------------ | :-----------                                 | :----------  | :------------------------- | :-----------------------                                                                               |
-| auth_method                            | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default`                               |
-| email                                  | string        | yes (private_key)                            | no           | nil                        | GCP Service Account Email                                                                              |
-| private_key_path                       | string        | yes (private_key)                            | no           | nil                        | GCP Private Key file path                                                                              |
-| private_key_passphrase                 | string        | yes (private_key)                            | no           | nil                        | GCP Private Key Passphrase                                                                             |
-| json_key                               | string        | yes (json_key)                               | no           | nil                        | GCP JSON Key file path or JSON Key string                                                              |
-| project                                | string        | yes                                          | yes          | nil                        |                                                                                                        |
-| dataset                                | string        | yes                                          | yes          | nil                        |                                                                                                        |
-| table                                  | string        | yes (either `tables`)                        | yes          | nil                        |                                                                                                        |
-| tables                                 | array(string) | yes (either `table`)                         | yes          | nil                        | can set multi table names splitted by `,`                                                              |
-| auto_create_table                      | bool          | no                                           | no           | false                      | If true, creates table automatically                                                                   |
-| ignore_unknown_values                  | bool          | no                                           | no           | false                      | Accept rows that contain values that do not match the schema. The unknown values are ignored.          |
-| schema                                 | array         | yes (either `fetch_schema` or `schema_path`) | no           | nil                        | Schema Definition. It is formatted by JSON.                                                            |
-| schema_path                            | string        | yes (either `fetch_schema`)                  | no           | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
-| fetch_schema                           | bool          | yes (either `schema_path`)                   | no           | false                      | If true, fetch table schema definition from Bigquery table automatically.                              |
-| fetch_schema_table                     | string        | no                                           | yes          | nil                        | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
-| schema_cache_expire                    | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
-| request_timeout_sec                    | integer       | no                                           | no           | nil                        | Bigquery API response timeout                                                                          |
-| request_open_timeout_sec               | integer       | no                                           | no           | 60                         | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value.       |
-| time_partitioning_type                 | enum          | no (either day)                              | no           | nil                        | Type of bigquery time partitioning feature(experimental feature on BigQuery).                          |
-| time_partitioning_field                | string        | no                                           | no           | nil                        | Field used to determine how to create a time-based partition(experimental feature on BigQuery).        |
-| time_partitioning_expiration           | time          | no                                           | no           | nil                        | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery)             |
+| name                                          | type          | required?                                    | placeholder? | default                    | description                                                                                            |
+| :-------------------------------------------- | :------------ | :-----------                                 | :----------  | :------------------------- | :-----------------------                                                                               |
+| auth_method                                   | enum          | yes                                          | no           | private_key                | `private_key` or `json_key` or `compute_engine` or `application_default`                               |
+| email                                         | string        | yes (private_key)                            | no           | nil                        | GCP Service Account Email                                                                              |
+| private_key_path                              | string        | yes (private_key)                            | no           | nil                        | GCP Private Key file path                                                                              |
+| private_key_passphrase                        | string        | yes (private_key)                            | no           | nil                        | GCP Private Key Passphrase                                                                             |
+| json_key                                      | string        | yes (json_key)                               | no           | nil                        | GCP JSON Key file path or JSON Key string                                                              |
+| project                                       | string        | yes                                          | yes          | nil                        |                                                                                                        |
+| dataset                                       | string        | yes                                          | yes          | nil                        |                                                                                                        |
+| table                                         | string        | yes (either `tables`)                        | yes          | nil                        |                                                                                                        |
+| tables                                        | array(string) | yes (either `table`)                         | yes          | nil                        | can set multi table names splitted by `,`                                                              |
+| auto_create_table                             | bool          | no                                           | no           | false                      | If true, creates table automatically                                                                   |
+| ignore_unknown_values                         | bool          | no                                           | no           | false                      | Accept rows that contain values that do not match the schema. The unknown values are ignored.          |
+| schema                                        | array         | yes (either `fetch_schema` or `schema_path`) | no           | nil                        | Schema Definition. It is formatted by JSON.                                                            |
+| schema_path                                   | string        | yes (either `fetch_schema`)                  | no           | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
+| fetch_schema                                  | bool          | yes (either `schema_path`)                   | no           | false                      | If true, fetch table schema definition from Bigquery table automatically.                              |
+| fetch_schema_table                            | string        | no                                           | yes          | nil                        | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
+| schema_cache_expire                           | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
+| request_timeout_sec                           | integer       | no                                           | no           | nil                        | Bigquery API response timeout                                                                          |
+| request_open_timeout_sec                      | integer       | no                                           | no           | 60                         | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value.       |
+| time_partitioning_type                        | enum          | no (either day)                              | no           | nil                        | Type of bigquery time partitioning feature(experimental feature on BigQuery).                          |
+| time_partitioning_field                       | string        | no                                           | no           | nil                        | Field used to determine how to create a time-based partition(experimental feature on BigQuery).        |
+| time_partitioning_expiration                  | time          | no                                           | no           | nil                        | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery)             |
+| time_partitioning_require_partition_filter    | bool          | no                                           | no           | false                      | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. (experimental feature on BigQuery) |
 #### bigquery_insert

data/lib/fluent/plugin/bigquery/errors.rb CHANGED

@@ -7,10 +7,9 @@ module Fluent
       RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
       class << self
-        def wrap(google_api_error, message = nil, force_unretryable: false)
-          e = google_api_error
-          return UnRetryableError.new(message, e) if force_unretryable
+        # @param e [Google::Apis::Error]
+        # @param message [String]
+        def wrap(e, message = nil)
           if retryable_error?(e)
             RetryableError.new(message, e)
           else
@@ -18,12 +17,9 @@ module Fluent
           end
         end
-        def retryable_error?(google_api_error)
-          e = google_api_error
-          reason = e.respond_to?(:reason) ? e.reason : nil
-          retryable_error_reason?(reason) ||
-            (e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
+        # @param e [Google::Apis::Error]
+        def retryable_error?(e)
+          e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
         end
         def retryable_error_reason?(reason)

data/lib/fluent/plugin/bigquery/version.rb CHANGED

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "2.0.0".freeze
+    VERSION = "2.1.0".freeze
   end
 end

data/lib/fluent/plugin/bigquery/writer.rb CHANGED

@@ -34,13 +34,7 @@ module Fluent
             }
           }
-          if @options[:time_partitioning_type]
-            definition[:time_partitioning] = {
-              type: @options[:time_partitioning_type].to_s.upcase,
-              field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
-              expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
-            }.select { |_, value| !value.nil? }
-          end
+          definition.merge!(time_partitioning: time_partitioning) if time_partitioning
           client.insert_table(project, dataset, definition, {})
           log.debug "create table", project_id: project, dataset: dataset, table: table_id
         rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -51,10 +45,9 @@ module Fluent
             return
           end
-          reason = e.respond_to?(:reason) ? e.reason : nil
-          log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
+          log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
-          if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
+          if create_table_retry_count < create_table_retry_limit
             sleep create_table_retry_wait
             create_table_retry_wait *= 2
             create_table_retry_count += 1
@@ -77,14 +70,19 @@ module Fluent
         nil
       end
-      def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
+      def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
         body = {
           rows: rows,
           skip_invalid_rows: @options[:skip_invalid_rows],
           ignore_unknown_values: @options[:ignore_unknown_values],
         }
         body.merge!(template_suffix: template_suffix) if template_suffix
-        res = client.insert_all_table_data(project, dataset, table_id, body, {})
+        if @options[:auto_create_table]
+          res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
+        else
+          res = client.insert_all_table_data(project, dataset, table_id, body, {})
+        end
         log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
         if res.insert_errors && !res.insert_errors.empty?
@@ -101,8 +99,7 @@ module Fluent
           end
         end
       rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-        reason = e.respond_to?(:reason) ? e.reason : nil
-        error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
+        error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
         wrapped = Fluent::BigQuery::Error.wrap(e)
         if wrapped.retryable?
           log.warn "tabledata.insertAll API", error_data
@@ -132,9 +129,6 @@ module Fluent
                 dataset_id: dataset,
                 table_id: table_id,
               },
-              schema: {
-                fields: fields.to_a,
-              },
               write_disposition: "WRITE_APPEND",
               source_format: source_format,
               ignore_unknown_values: @options[:ignore_unknown_values],
@@ -144,17 +138,18 @@ module Fluent
         }
         job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
-        configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
         configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
-        # If target table is already exist, omit schema configuration.
-        # Because schema changing is easier.
         begin
-          if client.get_table(project, dataset, table_id)
-            configuration[:configuration][:load].delete(:schema)
+          # Check table existance
+          client.get_table(project, dataset, table_id)
+        rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
+          if e.status_code == 404 && /Not Found: Table/i =~ e.message
+            raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
+            raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
+            configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
+            configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
           end
-        rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
-          raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
         end
         res = client.insert_job(
@@ -167,19 +162,7 @@ module Fluent
         )
         JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
       rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-        reason = e.respond_to?(:reason) ? e.reason : nil
-        log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
-        if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
-          # Table Not Found: Auto Create Table
-          create_table(
-            project,
-            dataset,
-            table_id,
-            fields,
-          )
-          raise "table created. send rows next time."
-        end
+        log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
         if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
           return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
@@ -317,6 +300,43 @@ module Fluent
           "NEWLINE_DELIMITED_JSON"
         end
       end
+      def time_partitioning
+        return @time_partitioning if instance_variable_defined?(:@time_partitioning)
+        if @options[:time_partitioning_type]
+          @time_partitioning = {
+            type: @options[:time_partitioning_type].to_s.upcase,
+            field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
+            expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
+            require_partition_filter: @options[:time_partitioning_require_partition_filter],
+          }.reject { |_, v| v.nil? }
+        else
+          @time_partitioning
+        end
+      end
+      def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
+        try_count ||= 1
+        res = client.insert_all_table_data(project, dataset, table_id, body, {})
+      rescue Google::Apis::ClientError => e
+        if e.status_code == 404 && /Not Found: Table/i =~ e.message
+          if try_count == 1
+            # Table Not Found: Auto Create Table
+            create_table(project, dataset, table_id, schema)
+          elsif try_count > 10
+            raise "A new table was created but it is not found."
+          end
+          # Retry to insert several times because the created table is not visible from Streaming insert for a little while
+          # cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
+          try_count += 1
+          sleep 5
+          log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
+          retry
+        end
+        raise
+      end
     end
   end
 end

data/lib/fluent/plugin/out_bigquery_base.rb CHANGED

@@ -69,6 +69,7 @@ module Fluent
       config_param :time_partitioning_type, :enum, list: [:day], default: nil
       config_param :time_partitioning_field, :string, default: nil
       config_param :time_partitioning_expiration, :time, default: nil
+      config_param :time_partitioning_require_partition_filter, :bool, default: false
       ## Formatter
       config_section :format do
@@ -139,8 +140,9 @@ module Fluent
           prevent_duplicate_load: @prevent_duplicate_load,
           auto_create_table: @auto_create_table,
           time_partitioning_type: @time_partitioning_type,
-          time_partitioning_field: time_partitioning_field,
+          time_partitioning_field: @time_partitioning_field,
           time_partitioning_expiration: @time_partitioning_expiration,
+          time_partitioning_require_partition_filter: @time_partitioning_require_partition_filter,
           timeout_sec: @request_timeout_sec,
           open_timeout_sec: @request_open_timeout_sec,
         })

data/lib/fluent/plugin/out_bigquery_insert.rb CHANGED

@@ -96,14 +96,8 @@ module Fluent
       end
       def insert(project, dataset, table_id, rows, schema, template_suffix)
-        writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
+        writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
       rescue Fluent::BigQuery::Error => e
-        if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
-          # Table Not Found: Auto Create Table
-          writer.create_table(project, dataset, table_id, schema)
-          raise "table created. send rows next time."
-        end
         raise if e.retryable?
         if @secondary

data/lib/fluent/plugin/out_bigquery_load.rb CHANGED

@@ -196,6 +196,7 @@ module Fluent
         rescue => e
           log.error("unexpected error while polling", error: e)
           log.error_backtrace
+          rollback_write(job_reference.chunk_id)
         end
       end

data/test/plugin/test_out_bigquery_insert.rb CHANGED

@@ -121,7 +121,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     driver = create_driver
     stub_writer do |writer|
-      mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
       mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
         rows: [{json: hash_including(entry)}],
         skip_invalid_rows: false,
@@ -346,10 +345,24 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     CONFIG
     stub_writer do |writer|
-      mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
-        raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
-      end
-      mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
+      body = {
+        rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
+        skip_invalid_rows: false,
+        ignore_unknown_values: false,
+      }
+      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
+        raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
+      end.at_least(1)
+      mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
+      mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
+        table_reference: {
+          table_id: 'foo',
+        },
+        schema: {
+          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+        },
+      }, {})
     end
     assert_raise(RuntimeError) do
@@ -403,13 +416,34 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       time_partitioning_type day
       time_partitioning_field time
       time_partitioning_expiration 1h
+      time_partitioning_require_partition_filter true
     CONFIG
     stub_writer do |writer|
-      mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
-        raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
-      end
-      mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
+      body = {
+        rows: [message],
+        skip_invalid_rows: false,
+        ignore_unknown_values: false,
+      }
+      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
+        raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
+      end.at_least(1)
+      mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
+      mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
+        table_reference: {
+          table_id: 'foo',
+        },
+        schema: {
+          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+        },
+        time_partitioning: {
+          type: 'DAY',
+          field: 'time',
+          expiration_ms: 3600000,
+          require_partition_filter: true
+        },
+      }, {})
     end
     assert_raise(RuntimeError) do

data/test/plugin/test_out_bigquery_load.rb CHANGED

@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
       writer
     end
   end
-  def test_write
-    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
+  def test_write
     response_stub = stub!
     driver = create_driver
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
               dataset_id: 'yourdataset_id',
               table_id: 'foo',
             },
-            schema: {
-              fields: schema_fields,
-            },
             write_disposition: "WRITE_APPEND",
             source_format: "NEWLINE_DELIMITED_JSON",
             ignore_unknown_values: false,
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
       schema_path #{SCHEMA_PATH}
       prevent_duplicate_load true
     CONFIG
-    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     response_stub = stub!
     stub_writer do |writer|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
               dataset_id: 'yourdataset_id',
               table_id: 'foo',
             },
-            schema: {
-              fields: schema_fields,
-            },
             write_disposition: "WRITE_APPEND",
             source_format: "NEWLINE_DELIMITED_JSON",
             ignore_unknown_values: false,
@@ -138,7 +129,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
   def test_write_with_retryable_error
     driver = create_driver
-    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
               dataset_id: 'yourdataset_id',
               table_id: 'foo',
             },
-            schema: {
-              fields: schema_fields,
-            },
             write_disposition: "WRITE_APPEND",
             source_format: "NEWLINE_DELIMITED_JSON",
             ignore_unknown_values: false,
@@ -225,7 +212,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
         utc
       </secondary>
     CONFIG
-    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
               dataset_id: 'yourdataset_id',
               table_id: 'foo',
             },
-            schema: {
-              fields: schema_fields,
-            },
             write_disposition: "WRITE_APPEND",
             source_format: "NEWLINE_DELIMITED_JSON",
             ignore_unknown_values: false,
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
     driver.instance_shutdown
   end
+  def test_write_with_auto_create_table
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <buffer>
+        @type memory
+      </buffer>
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      auto_create_table true
+      schema_path #{SCHEMA_PATH}
+    CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
+    stub_writer do |writer|
+      mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
+        raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
+      end
+      mock(writer.client).insert_job('yourproject_id', {
+        configuration: {
+          load: {
+            destination_table: {
+              project_id: 'yourproject_id',
+              dataset_id: 'yourdataset_id',
+              table_id: 'foo',
+            },
+            write_disposition: "WRITE_APPEND",
+            source_format: "NEWLINE_DELIMITED_JSON",
+            ignore_unknown_values: false,
+            max_bad_records: 0,
+            schema: {
+              fields: schema_fields,
+            },
+          }
+        }
+      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+        stub!.job_reference.stub!.job_id { "dummy_job_id" }
+      end
+    end
+    driver.run do
+      driver.feed("tag", Time.now.to_i, {"a" => "b"})
+    end
+  end
   private
   def create_response_stub(response)

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 2.0.0
+  version: 2.1.0
 platform: ruby
 authors:
 - Naoya Ito
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-09-05 00:00:00.000000000 Z
+date: 2018-11-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake