RubyGems - fluent-plugin-bigquery-test - Versions diffs - 2.2.0 - Mend

fluent-plugin-bigquery-test 2.2.0

Files changed (28) hide show

checksums.yaml +7 -0
data/.github/ISSUE_TEMPLATE.md +16 -0
data/.gitignore +21 -0
data/.travis.yml +14 -0
data/Gemfile +4 -0
data/LICENSE.txt +13 -0
data/README.md +602 -0
data/Rakefile +12 -0
data/fluent-plugin-bigquery.gemspec +29 -0
data/gemfiles/activesupport-4.gemfile +6 -0
data/lib/fluent/plugin/bigquery/errors.rb +84 -0
data/lib/fluent/plugin/bigquery/helper.rb +33 -0
data/lib/fluent/plugin/bigquery/schema.rb +281 -0
data/lib/fluent/plugin/bigquery/version.rb +5 -0
data/lib/fluent/plugin/bigquery/writer.rb +356 -0
data/lib/fluent/plugin/out_bigquery_base.rb +221 -0
data/lib/fluent/plugin/out_bigquery_insert.rb +125 -0
data/lib/fluent/plugin/out_bigquery_load.rb +221 -0
data/test/helper.rb +20 -0
data/test/plugin/test_out_bigquery_base.rb +579 -0
data/test/plugin/test_out_bigquery_insert.rb +544 -0
data/test/plugin/test_out_bigquery_load.rb +348 -0
data/test/plugin/test_record_schema.rb +186 -0
data/test/plugin/testdata/apache.schema +98 -0
data/test/plugin/testdata/json_key.json +7 -0
data/test/plugin/testdata/sudo.schema +27 -0
data/test/run_test.rb +9 -0
metadata +197 -0

data/lib/fluent/plugin/out_bigquery_insert.rb ADDED

@@ -0,0 +1,125 @@
+require 'fluent/plugin/out_bigquery_base'
+module Fluent
+  module Plugin
+    class BigQueryInsertOutput < BigQueryBaseOutput
+      Fluent::Plugin.register_output('bigquery_insert', self)
+      helpers :record_accessor
+      # template_suffix (only insert)
+      #   https://cloud.google.com/bigquery/streaming-data-into-bigquery#template_table_details
+      config_param :template_suffix, :string, default: nil
+      # skip_invalid_rows (only insert)
+      #   Insert all valid rows of a request, even if invalid rows exist.
+      #   The default value is false, which causes the entire request to fail if any invalid rows exist.
+      config_param :skip_invalid_rows, :bool, default: false
+      # insert_id_field (only insert)
+      config_param :insert_id_field, :string, default: nil
+      # add_insert_timestamp (only insert)
+      # adds a timestamp just before sending the rows to bigquery, so that
+      # buffering time is not taken into account. Gives a field in bigquery
+      # which represents the insert time of the row.
+      config_param :add_insert_timestamp, :string, default: nil
+      # allow_retry_insert_errors (only insert)
+      # If insert_id_field is not specified, true means to allow duplicate rows
+      config_param :allow_retry_insert_errors, :bool, default: false
+      ## Buffer
+      config_section :buffer do
+        config_set_default :@type, "memory"
+        config_set_default :flush_mode, :interval
+        config_set_default :flush_interval, 1
+        config_set_default :flush_thread_interval, 0.05
+        config_set_default :flush_thread_burst_interval, 0.05
+        config_set_default :chunk_limit_size, 1 * 1024 ** 2 # 1MB
+        config_set_default :total_limit_size, 1 * 1024 ** 3 # 1GB
+        config_set_default :chunk_limit_records, 500
+      end
+      def configure(conf)
+        super
+        if @insert_id_field
+          if @insert_id_field !~ /^\$[\[\.]/ && @insert_id_field =~ /\./
+            warn "[BREAKING CHANGE] insert_id_field format is changed. Use fluentd record_accessor helper. (https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)"
+          end
+          @get_insert_id = record_accessor_create(@insert_id_field)
+        end
+        formatter_config = conf.elements("format")[0]
+        if formatter_config && formatter_config['@type'] != "json"
+          raise ConfigError, "`bigquery_insert` supports only json formatter."
+        end
+        @formatter = formatter_create(usage: 'out_bigquery_for_insert', type: 'json', conf: formatter_config)
+        placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}/template_suffix=#{@template_suffix}"
+        placeholder_validate!(:bigquery_insert, placeholder_params)
+      end
+      # for Fluent::Plugin::Output#implement? method
+      def format(tag, time, record)
+        super
+      end
+      def write(chunk)
+        table_format = @tables_mutex.synchronize do
+          t = @tables_queue.shift
+          @tables_queue.push t
+          t
+        end
+        now = Time.now.utc.strftime("%Y-%m-%d %H:%M:%S.%6N") if @add_insert_timestamp
+        rows = chunk.open do |io|
+          io.map do |line|
+            record = MultiJson.load(line)
+            record[@add_insert_timestamp] = now if @add_insert_timestamp
+            row = {"json" => record}
+            row["insert_id"] = @get_insert_id.call(record) if @get_insert_id
+            Fluent::BigQuery::Helper.deep_symbolize_keys(row)
+          end
+        end
+        metadata = chunk.metadata
+        project = extract_placeholders(@project, metadata)
+        dataset = extract_placeholders(@dataset, metadata)
+        table_id = extract_placeholders(table_format, metadata)
+        template_suffix = @template_suffix ? extract_placeholders(@template_suffix, metadata) : nil
+        schema = get_schema(project, dataset, metadata)
+        insert(project, dataset, table_id, rows, schema, template_suffix)
+      end
+      def insert(project, dataset, table_id, rows, schema, template_suffix)
+        writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
+      rescue Fluent::BigQuery::Error => e
+        raise if e.retryable?
+        if @secondary
+          # TODO: find better way
+          @retry = retry_state_create(
+            :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+            forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
+            max_interval: @buffer_config.retry_max_interval,
+            secondary: true, secondary_threshold: Float::EPSILON,
+            randomize: @buffer_config.retry_randomize
+          )
+        else
+          @retry = retry_state_create(
+            :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+            forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
+            max_interval: @buffer_config.retry_max_interval,
+            randomize: @buffer_config.retry_randomize
+          )
+        end
+        raise
+      end
+    end
+  end
+end

data/lib/fluent/plugin/out_bigquery_load.rb ADDED

@@ -0,0 +1,221 @@
+require 'fluent/plugin/out_bigquery_base'
+module Fluent
+  module Plugin
+    class BigQueryLoadOutput < BigQueryBaseOutput
+      Fluent::Plugin.register_output('bigquery_load', self)
+      helpers :timer
+      config_param :source_format, :enum, list: [:json, :avro, :csv], default: :json
+      # max_bad_records (only load)
+      #   The maximum number of bad records that BigQuery can ignore when running the job.
+      #   If the number of bad records exceeds this value, an invalid error is returned in the job result.
+      #   The default value is 0, which requires that all records are valid.
+      config_param :max_bad_records, :integer, default: 0
+      # prevent_duplicate_load (only load)
+      config_param :prevent_duplicate_load, :bool, default: false
+      config_param :use_delayed_commit, :bool, default: true
+      config_param :wait_job_interval, :time, default: 3
+      ## Buffer
+      config_section :buffer do
+        config_set_default :@type, "file"
+        config_set_default :flush_mode, :interval
+        config_set_default :flush_interval, 3600 # 1h
+        config_set_default :flush_thread_interval, 5
+        config_set_default :flush_thread_burst_interval, 5
+        config_set_default :chunk_limit_size, 1 * 1024 ** 3 # 1GB
+        config_set_default :total_limit_size, 32 * 1024 ** 3 # 32GB
+        config_set_default :delayed_commit_timeout, 1800 # 30m
+      end
+      def configure(conf)
+        super
+        placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
+        placeholder_validate!(:bigquery_load, placeholder_params)
+      end
+      def start
+        super
+        if prefer_delayed_commit
+          @polling_targets = []
+          @polling_mutex = Mutex.new
+          log.debug("start load job polling")
+          timer_execute(:polling_bigquery_load_job, @wait_job_interval, &method(:poll))
+        end
+      end
+      def prefer_delayed_commit
+        @use_delayed_commit
+      end
+      # for Fluent::Plugin::Output#implement? method
+      def format(tag, time, record)
+        super
+      end
+      def write(chunk)
+        job_reference = do_write(chunk)
+        until response = writer.fetch_load_job(job_reference)
+          sleep @wait_job_interval
+        end
+        writer.commit_load_job(job_reference.chunk_id_hex, response)
+      rescue Fluent::BigQuery::Error => e
+        raise if e.retryable?
+        @retry_mutex.synchronize do
+          if @secondary
+            # TODO: find better way
+            @retry = retry_state_create(
+              :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+              forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
+              max_interval: @buffer_config.retry_max_interval,
+              secondary: true, secondary_threshold: Float::EPSILON,
+              randomize: @buffer_config.retry_randomize
+            )
+          else
+            @retry = retry_state_create(
+              :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+              forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
+              max_interval: @buffer_config.retry_max_interval,
+              randomize: @buffer_config.retry_randomize
+            )
+          end
+        end
+        raise
+      end
+      def try_write(chunk)
+        job_reference = do_write(chunk)
+        @polling_mutex.synchronize do
+          @polling_targets << job_reference
+        end
+      rescue Fluent::BigQuery::Error => e
+        raise if e.retryable?
+        @retry_mutex.synchronize do
+          if @secondary
+            # TODO: find better way
+            @retry = retry_state_create(
+              :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+              forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
+              max_interval: @buffer_config.retry_max_interval,
+              secondary: true, secondary_threshold: Float::EPSILON,
+              randomize: @buffer_config.retry_randomize
+            )
+          else
+            @retry = retry_state_create(
+              :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+              forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
+              max_interval: @buffer_config.retry_max_interval,
+              randomize: @buffer_config.retry_randomize
+            )
+          end
+        end
+        raise
+      end
+      private
+      def do_write(chunk)
+        table_format = @tables_mutex.synchronize do
+          t = @tables_queue.shift
+          @tables_queue.push t
+          t
+        end
+        metadata = chunk.metadata
+        project = extract_placeholders(@project, metadata)
+        dataset = extract_placeholders(@dataset, metadata)
+        table_id = extract_placeholders(table_format, metadata)
+        schema = get_schema(project, dataset, metadata)
+        create_upload_source(chunk) do |upload_source|
+          writer.create_load_job(chunk.unique_id, dump_unique_id_hex(chunk.unique_id), project, dataset, table_id, upload_source, schema)
+        end
+      end
+      def poll
+        job_reference = @polling_mutex.synchronize do
+          @polling_targets.shift
+        end
+        return unless job_reference
+        begin
+          response = writer.fetch_load_job(job_reference)
+          if response
+            writer.commit_load_job(job_reference.chunk_id_hex, response)
+            commit_write(job_reference.chunk_id)
+            log.debug("commit chunk", chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
+          else
+            @polling_mutex.synchronize do
+              @polling_targets << job_reference
+            end
+          end
+        rescue Fluent::BigQuery::Error => e
+          # RetryableError comes from only `commit_load_job`
+          # if error is retryable, takeback chunk and do next `try_flush`
+          # if error is not retryable, create custom retry_state and takeback chunk do next `try_flush`
+          if e.retryable?
+            log.warn("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
+          else
+            log.error("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
+            @retry_mutex.synchronize do
+              if @secondary
+                # TODO: find better way
+                @retry = retry_state_create(
+                  :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+                  forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
+                  max_interval: @buffer_config.retry_max_interval,
+                  secondary: true, secondary_threshold: Float::EPSILON,
+                  randomize: @buffer_config.retry_randomize
+                )
+              else
+                @retry = retry_state_create(
+                  :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
+                  forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
+                  max_interval: @buffer_config.retry_max_interval,
+                  randomize: @buffer_config.retry_randomize
+                )
+              end
+            end
+          end
+          rollback_write(job_reference.chunk_id)
+        rescue => e
+          log.error("unexpected error while polling", error: e)
+          log.error_backtrace
+          rollback_write(job_reference.chunk_id)
+        end
+      end
+      def create_upload_source(chunk)
+        chunk_is_file = @buffer_config["@type"] == 'file'
+        if chunk_is_file
+          File.open(chunk.path) do |file|
+            yield file
+          end
+        else
+          Tempfile.open("chunk-tmp") do |file|
+            file.binmode
+            chunk.write_to(file)
+            file.sync
+            file.rewind
+            yield file
+          end
+        end
+      end
+    end
+  end
+end

data/test/helper.rb ADDED

@@ -0,0 +1,20 @@
+require 'bundler/setup'
+require 'test/unit'
+$LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
+$LOAD_PATH.unshift(__dir__)
+require 'fluent/test'
+require 'fluent/plugin/buffer'
+require 'fluent/plugin/buf_memory'
+require 'fluent/plugin/buf_file'
+require 'fluent/test/driver/output'
+require 'fluent/plugin/out_bigquery_base'
+require 'fluent/plugin/out_bigquery_insert'
+require 'fluent/plugin/out_bigquery_load'
+require 'google/apis/bigquery_v2'
+require 'google/api_client/auth/key_utils'
+require 'googleauth'
+require 'test/unit/rr'

data/test/plugin/test_out_bigquery_base.rb ADDED

@@ -0,0 +1,579 @@
+require 'helper'
+class BigQueryBaseOutputTest < Test::Unit::TestCase
+  def setup
+    Fluent::Test.setup
+  end
+  CONFIG = %[
+    table foo
+    email foo@bar.example
+    private_key_path /path/to/key
+    project yourproject_id
+    dataset yourdataset_id
+    <inject>
+    time_format %s
+    time_key  time
+    </inject>
+    schema [
+      {"name": "time", "type": "INTEGER"},
+      {"name": "status", "type": "INTEGER"},
+      {"name": "bytes", "type": "INTEGER"},
+      {"name": "vhost", "type": "STRING"},
+      {"name": "path", "type": "STRING"},
+      {"name": "method", "type": "STRING"},
+      {"name": "protocol", "type": "STRING"},
+      {"name": "agent", "type": "STRING"},
+      {"name": "referer", "type": "STRING"},
+      {"name": "remote", "type": "RECORD", "fields": [
+        {"name": "host", "type": "STRING"},
+        {"name": "ip", "type": "STRING"},
+        {"name": "user", "type": "STRING"}
+      ]},
+      {"name": "requesttime", "type": "FLOAT"},
+      {"name": "bot_access", "type": "BOOLEAN"},
+      {"name": "loginsession", "type": "BOOLEAN"}
+    ]
+  ]
+  API_SCOPE = "https://www.googleapis.com/auth/bigquery"
+  def create_driver(conf = CONFIG)
+    Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryBaseOutput).configure(conf)
+  end
+  def stub_writer(stub_auth: true)
+    stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
+      stub(writer).get_auth { nil } if stub_auth
+      yield writer
+      writer
+    end
+  end
+  private def sudo_schema_response
+    {
+      "schema" => {
+        "fields" => [
+          {
+            "name" => "time",
+            "type" => "TIMESTAMP",
+            "mode" => "REQUIRED"
+          },
+          {
+            "name" => "tty",
+            "type" => "STRING",
+            "mode" => "NULLABLE"
+          },
+          {
+            "name" => "pwd",
+            "type" => "STRING",
+            "mode" => "REQUIRED"
+          },
+          {
+            "name" => "user",
+            "type" => "STRING",
+            "mode" => "REQUIRED"
+          },
+          {
+            "name" => "argv",
+            "type" => "STRING",
+            "mode" => "REPEATED"
+          }
+        ]
+      }
+    }
+  end
+  def test_configure_table
+    driver = create_driver
+    assert_equal driver.instance.table, 'foo'
+    assert_nil driver.instance.tables
+    driver = create_driver(CONFIG.sub(/\btable\s+.*$/,  'tables foo,bar'))
+    assert_nil driver.instance.table
+    assert_equal driver.instance.tables, ['foo' ,'bar']
+    assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
+      create_driver(CONFIG + "tables foo,bar")
+    }
+  end
+  def test_configure_auth_private_key
+    driver = create_driver
+    stub_writer(stub_auth: false) do |writer|
+      mock(writer).get_auth_from_private_key { stub! }
+    end
+    assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
+  end
+  def test_configure_auth_compute_engine
+    driver = create_driver(%[
+      table foo
+      auth_method compute_engine
+      project yourproject_id
+      dataset yourdataset_id
+      schema [
+        {"name": "time", "type": "INTEGER"},
+        {"name": "status", "type": "INTEGER"},
+        {"name": "bytes", "type": "INTEGER"}
+      ]
+    ])
+    stub_writer(stub_auth: false) do |writer|
+      mock(writer).get_auth_from_compute_engine { stub! }
+    end
+    assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
+  end
+  def test_configure_auth_json_key_as_file
+    driver = create_driver(%[
+      table foo
+      auth_method json_key
+      json_key jsonkey.josn
+      project yourproject_id
+      dataset yourdataset_id
+      schema [
+        {"name": "time", "type": "INTEGER"},
+        {"name": "status", "type": "INTEGER"},
+        {"name": "bytes", "type": "INTEGER"}
+      ]
+    ])
+    stub_writer(stub_auth: false) do |writer|
+      mock(writer).get_auth_from_json_key { stub! }
+    end
+    assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
+  end
+  def test_configure_auth_json_key_as_file_raise_permission_error
+    json_key_path = 'test/plugin/testdata/json_key.json'
+    json_key_path_dir = File.dirname(json_key_path)
+    begin
+      File.chmod(0000, json_key_path_dir)
+      driver = create_driver(%[
+        table foo
+        auth_method json_key
+        json_key #{json_key_path}
+        project yourproject_id
+        dataset yourdataset_id
+        schema [
+          {"name": "time", "type": "INTEGER"},
+          {"name": "status", "type": "INTEGER"},
+          {"name": "bytes", "type": "INTEGER"}
+        ]
+      ])
+      assert_raises(Errno::EACCES) do
+        driver.instance.writer.client
+      end
+    ensure
+      File.chmod(0755, json_key_path_dir)
+    end
+  end
+  def test_configure_auth_json_key_as_string
+    json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
+    json_key_io = StringIO.new(json_key)
+    authorization = Object.new
+    stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
+    driver = create_driver(%[
+      table foo
+      auth_method json_key
+      json_key #{json_key}
+      project yourproject_id
+      dataset yourdataset_id
+      schema [
+        {"name": "time", "type": "INTEGER"},
+        {"name": "status", "type": "INTEGER"},
+        {"name": "bytes", "type": "INTEGER"}
+      ]
+    ])
+    stub_writer(stub_auth: false) do |writer|
+      mock.proxy(writer).get_auth_from_json_key { stub! }
+    end
+    assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
+  end
+  def test_configure_auth_application_default
+    driver = create_driver(%[
+      table foo
+      auth_method application_default
+      project yourproject_id
+      dataset yourdataset_id
+      schema [
+        {"name": "time", "type": "INTEGER"},
+        {"name": "status", "type": "INTEGER"},
+        {"name": "bytes", "type": "INTEGER"}
+      ]
+    ])
+    stub_writer(stub_auth: false) do |writer|
+      mock.proxy(writer).get_auth_from_application_default { stub! }
+    end
+    assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
+  end
+  def test_format
+    now = Fluent::EventTime.new(Time.now.to_i)
+    input = {
+      "status" => "1",
+      "bytes" => 3.0,
+      "vhost" => :bar,
+      "path" => "/path/to/baz",
+      "method" => "GET",
+      "protocol" => "HTTP/0.9",
+      "agent" => "libwww",
+      "referer" => "http://referer.example",
+      "requesttime" => (now - 1).to_f.to_s,
+      "bot_access" => true,
+      "loginsession" => false,
+      "something-else" => "would be ignored",
+      "yet-another" => {
+        "foo" => "bar",
+        "baz" => 1,
+      },
+      "remote" => {
+        "host" => "remote.example",
+        "ip" =>  "192.0.2.1",
+        "port" => 12345,
+        "user" => "tagomoris",
+      }
+    }
+    expected = {
+      "time" => now.to_i,
+      "status" => 1,
+      "bytes" => 3,
+      "vhost" => "bar",
+      "path" => "/path/to/baz",
+      "method" => "GET",
+      "protocol" => "HTTP/0.9",
+      "agent" => "libwww",
+      "referer" => "http://referer.example",
+      "requesttime" => (now - 1).to_f.to_s.to_f,
+      "bot_access" => true,
+      "loginsession" => false,
+      "something-else" => "would be ignored",
+      "yet-another" => {
+        "foo" => "bar",
+        "baz" => 1,
+      },
+      "remote" => {
+        "host" => "remote.example",
+        "ip" =>  "192.0.2.1",
+        "port" => 12345,
+        "user" => "tagomoris",
+      }
+    }
+    driver = create_driver(CONFIG)
+    buf = nil
+    driver.run { buf = driver.instance.format("my.tag", now, input) }
+    assert_equal expected, MultiJson.load(buf)
+  end
+  [
+    # <time_format>, <time field type>, <time expectation generator>, <assertion>
+    [
+      "%s.%6N",
+      lambda{|t| t.strftime("%s.%6N").to_f },
+      lambda{|recv, expected, actual|
+        recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
+      }
+    ],
+    [
+      "%Y-%m-%dT%H:%M:%S%:z",
+      lambda{|t| t.iso8601 },
+      :assert_equal.to_proc
+    ],
+  ].each do |format, expect_time, assert|
+    define_method("test_time_formats_#{format}") do
+      now = Fluent::Engine.now
+      input = {}
+      expected = { "time" => expect_time[Time.at(now.to_r)] }
+      driver = create_driver(<<-CONFIG)
+        table foo
+        email foo@bar.example
+        private_key_path /path/to/key
+        project yourproject_id
+        dataset yourdataset_id
+        <inject>
+        time_format #{format}
+        time_type string
+        time_key  time
+        </inject>
+        schema [
+          {"name": "metadata", "type": "RECORD", "fields": [
+            {"name": "time", "type": "INTEGER"},
+            {"name": "node", "type": "STRING"}
+          ]},
+          {"name": "log", "type": "STRING"}
+        ]
+      CONFIG
+      buf = nil
+      driver.run { buf = driver.instance.format("my.tag", now, input) }
+      assert[self, expected["time"], MultiJson.load(buf)["time"]]
+    end
+  end
+  def test_format_with_schema
+    now = Fluent::EventTime.new(Time.now.to_i)
+    input = {
+      "request" => {
+        "vhost" => :bar,
+        "path" => "/path/to/baz",
+        "method" => "GET",
+        "protocol" => "HTTP/0.9",
+        "agent" => "libwww",
+        "referer" => "http://referer.example",
+        "time" => (now - 1).to_f,
+        "bot_access" => true,
+        "loginsession" => false,
+      },
+      "response" => {
+        "status" => "1",
+        "bytes" => 3.0,
+      },
+      "remote" => {
+        "host" => "remote.example",
+        "ip" =>  "192.0.2.1",
+        "port" => 12345,
+        "user" => "tagomoris",
+      },
+      "something-else" => "would be ignored",
+      "yet-another" => {
+        "foo" => "bar",
+        "baz" => 1,
+      },
+    }
+    expected = {
+      "time" => now.to_f,
+      "request" => {
+        "vhost" => "bar",
+        "path" => "/path/to/baz",
+        "method" => "GET",
+        "protocol" => "HTTP/0.9",
+        "agent" => "libwww",
+        "referer" => "http://referer.example",
+        "time" => (now - 1).to_f,
+        "bot_access" => true,
+        "loginsession" => false,
+      },
+      "remote" => {
+        "host" => "remote.example",
+        "ip" =>  "192.0.2.1",
+        "port" => 12345,
+        "user" => "tagomoris",
+      },
+      "response" => {
+        "status" => 1,
+        "bytes" => 3,
+      },
+      "something-else" => "would be ignored",
+      "yet-another" => {
+        "foo" => "bar",
+        "baz" => 1,
+      },
+    }
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
+      schema [{"name": "time", "type": "INTEGER"}]
+    CONFIG
+    buf = nil
+    driver.run { buf = driver.instance.format("my.tag", now, input) }
+    assert_equal expected, MultiJson.load(buf)
+  end
+  def test_format_repeated_field_with_schema
+    now = Fluent::EventTime.new(Time.now.to_i)
+    input = {
+      "tty" => nil,
+      "pwd" => "/home/yugui",
+      "user" => "fluentd",
+      "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+    }
+    expected = {
+      "time" => now.to_f,
+      "pwd" => "/home/yugui",
+      "user" => "fluentd",
+      "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+    }
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
+      schema [{"name": "time", "type": "INTEGER"}]
+    CONFIG
+    buf = nil
+    driver.run { buf = driver.instance.format("my.tag", now, input) }
+    assert_equal expected, MultiJson.load(buf)
+  end
+  def test_format_fetch_from_bigquery_api
+    now = Fluent::EventTime.new(Time.now.to_i)
+    input = {
+      "tty" => nil,
+      "pwd" => "/home/yugui",
+      "user" => "fluentd",
+      "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+    }
+    expected = {
+      "time" => now.to_i,
+      "pwd" => "/home/yugui",
+      "user" => "fluentd",
+      "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+    }
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      fetch_schema true
+      schema [{"name": "time", "type": "INTEGER"}]
+    CONFIG
+    stub_writer do |writer|
+      mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
+        sudo_schema_response["schema"]["fields"]
+      end
+    end
+    buf = nil
+    driver.run { buf = driver.instance.format("my.tag", now, input) }
+    assert_equal expected, MultiJson.load(buf)
+    table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
+    assert table_schema["time"]
+    assert_equal :timestamp, table_schema["time"].type
+    assert_equal :required, table_schema["time"].mode
+    assert table_schema["tty"]
+    assert_equal :string, table_schema["tty"].type
+    assert_equal :nullable, table_schema["tty"].mode
+    assert table_schema["pwd"]
+    assert_equal :string, table_schema["pwd"].type
+    assert_equal :required, table_schema["pwd"].mode
+    assert table_schema["user"]
+    assert_equal :string, table_schema["user"].type
+    assert_equal :required, table_schema["user"].mode
+    assert table_schema["argv"]
+    assert_equal :string, table_schema["argv"].type
+    assert_equal :repeated, table_schema["argv"].mode
+  end
+  def test_format_fetch_from_bigquery_api_with_fetch_schema_table
+    now = Fluent::EventTime.new(Time.now.to_i)
+    input = {
+      "tty" => nil,
+      "pwd" => "/home/yugui",
+      "user" => "fluentd",
+      "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+    }
+    expected = {
+      "time" => now.to_i,
+      "pwd" => "/home/yugui",
+      "user" => "fluentd",
+      "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+    }
+    driver = create_driver(<<-CONFIG)
+      table foo_%Y_%m_%d
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      fetch_schema true
+      fetch_schema_table foo
+      schema [{"name": "time", "type": "INTEGER"}]
+      <buffer time>
+        timekey 1d
+      </buffer>
+    CONFIG
+    stub_writer do |writer|
+      mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
+        sudo_schema_response["schema"]["fields"]
+      end
+    end
+    buf = nil
+    driver.run { buf = driver.instance.format("my.tag", now, input) }
+    assert_equal expected, MultiJson.load(buf)
+    table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
+    assert table_schema["time"]
+    assert_equal :timestamp, table_schema["time"].type
+    assert_equal :required, table_schema["time"].mode
+    assert table_schema["tty"]
+    assert_equal :string, table_schema["tty"].type
+    assert_equal :nullable, table_schema["tty"].mode
+    assert table_schema["pwd"]
+    assert_equal :string, table_schema["pwd"].type
+    assert_equal :required, table_schema["pwd"].mode
+    assert table_schema["user"]
+    assert_equal :string, table_schema["user"].type
+    assert_equal :required, table_schema["user"].mode
+    assert table_schema["argv"]
+    assert_equal :string, table_schema["argv"].type
+    assert_equal :repeated, table_schema["argv"].mode
+  end
+end