RubyGems - fluent-plugin-bigquery-test - Versions diffs - 2.2.0 - Mend

fluent-plugin-bigquery-test 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +7 -0
data/.github/ISSUE_TEMPLATE.md +16 -0
data/.gitignore +21 -0
data/.travis.yml +14 -0
data/Gemfile +4 -0
data/LICENSE.txt +13 -0
data/README.md +602 -0
data/Rakefile +12 -0
data/fluent-plugin-bigquery.gemspec +29 -0
data/gemfiles/activesupport-4.gemfile +6 -0
data/lib/fluent/plugin/bigquery/errors.rb +84 -0
data/lib/fluent/plugin/bigquery/helper.rb +33 -0
data/lib/fluent/plugin/bigquery/schema.rb +281 -0
data/lib/fluent/plugin/bigquery/version.rb +5 -0
data/lib/fluent/plugin/bigquery/writer.rb +356 -0
data/lib/fluent/plugin/out_bigquery_base.rb +221 -0
data/lib/fluent/plugin/out_bigquery_insert.rb +125 -0
data/lib/fluent/plugin/out_bigquery_load.rb +221 -0
data/test/helper.rb +20 -0
data/test/plugin/test_out_bigquery_base.rb +579 -0
data/test/plugin/test_out_bigquery_insert.rb +544 -0
data/test/plugin/test_out_bigquery_load.rb +348 -0
data/test/plugin/test_record_schema.rb +186 -0
data/test/plugin/testdata/apache.schema +98 -0
data/test/plugin/testdata/json_key.json +7 -0
data/test/plugin/testdata/sudo.schema +27 -0
data/test/run_test.rb +9 -0
metadata +197 -0

data/test/plugin/test_out_bigquery_load.rb ADDED

@@ -0,0 +1,348 @@
+require 'helper'
+class BigQueryLoadOutputTest < Test::Unit::TestCase
+  def setup
+    Fluent::Test.setup
+  end
+  SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
+  CONFIG = %[
+    table foo
+    email foo@bar.example
+    private_key_path /path/to/key
+    project yourproject_id
+    dataset yourdataset_id
+    <buffer>
+      @type memory
+    </buffer>
+    <inject>
+    time_format %s
+    time_key  time
+    </inject>
+    schema_path #{SCHEMA_PATH}
+    wait_job_interval 0.1
+  ]
+  API_SCOPE = "https://www.googleapis.com/auth/bigquery"
+  def create_driver(conf = CONFIG)
+    Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryLoadOutput).configure(conf)
+  end
+  def stub_writer(stub_auth: true)
+    stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
+      stub(writer).get_auth { nil } if stub_auth
+      yield writer
+      writer
+    end
+  end
+  def test_write
+    response_stub = stub!
+    driver = create_driver
+    stub_writer do |writer|
+      mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
+      mock(writer).commit_load_job(is_a(String), response_stub)
+      mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
+      mock(writer.client).insert_job('yourproject_id', {
+        configuration: {
+          load: {
+            destination_table: {
+              project_id: 'yourproject_id',
+              dataset_id: 'yourdataset_id',
+              table_id: 'foo',
+            },
+            write_disposition: "WRITE_APPEND",
+            source_format: "NEWLINE_DELIMITED_JSON",
+            ignore_unknown_values: false,
+            max_bad_records: 0,
+          }
+        }
+      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+        stub!.job_reference.stub!.job_id { "dummy_job_id" }
+      end
+    end
+    driver.run do
+      driver.feed("tag", Time.now.to_i, {"a" => "b"})
+    end
+  end
+  def test_write_with_prevent_duplicate_load
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <buffer>
+        @type memory
+      </buffer>
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      schema_path #{SCHEMA_PATH}
+      prevent_duplicate_load true
+    CONFIG
+    response_stub = stub!
+    stub_writer do |writer|
+      mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
+      mock(writer).commit_load_job(is_a(String), response_stub)
+      mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
+      mock(writer.client).insert_job('yourproject_id', {
+        configuration: {
+          load: {
+            destination_table: {
+              project_id: 'yourproject_id',
+              dataset_id: 'yourdataset_id',
+              table_id: 'foo',
+            },
+            write_disposition: "WRITE_APPEND",
+            source_format: "NEWLINE_DELIMITED_JSON",
+            ignore_unknown_values: false,
+            max_bad_records: 0,
+          },
+        },
+        job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
+      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+        stub!.job_reference.stub!.job_id { "dummy_job_id" }
+      end
+    end
+    driver.run do
+      driver.feed("tag", Time.now.to_i, {"a" => "b"})
+    end
+  end
+  def test_write_with_retryable_error
+    driver = create_driver
+    driver.instance_start
+    tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
+    chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
+      c.append([driver.instance.format(tag, time, record)])
+    end
+    stub_writer do |writer|
+      mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
+      mock(writer.client).insert_job('yourproject_id', {
+        configuration: {
+          load: {
+            destination_table: {
+              project_id: 'yourproject_id',
+              dataset_id: 'yourdataset_id',
+              table_id: 'foo',
+            },
+            write_disposition: "WRITE_APPEND",
+            source_format: "NEWLINE_DELIMITED_JSON",
+            ignore_unknown_values: false,
+            max_bad_records: 0,
+          }
+        }
+      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+        stub!.job_reference.stub!.job_id { "dummy_job_id" }
+      end
+      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
+        stub! do |s|
+          s.id { 'dummy_job_id' }
+          s.configuration.stub! do |_s|
+            _s.load.stub! do |__s|
+              __s.destination_table.stub! do |___s|
+                ___s.project_id { 'yourproject_id' }
+                ___s.dataset_id { 'yourdataset_id' }
+                ___s.table_id { 'foo' }
+              end
+            end
+          end
+          s.status.stub! do |_s|
+            _s.state { 'DONE' }
+            _s.errors { [] }
+            _s.error_result.stub! do |__s|
+              __s.message { 'error' }
+              __s.reason { 'backendError' }
+            end
+          end
+        end
+      end
+    end
+    assert_raise Fluent::BigQuery::RetryableError do
+      driver.instance.write(chunk)
+    end
+    driver.instance_shutdown
+  end
+  def test_write_with_not_retryable_error
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <buffer>
+        @type memory
+      </buffer>
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      schema_path #{SCHEMA_PATH}
+      <secondary>
+        @type file
+        path error
+        utc
+      </secondary>
+    CONFIG
+    driver.instance_start
+    tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
+    chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
+      c.append([driver.instance.format(tag, time, record)])
+    end
+    stub_writer do |writer|
+      mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
+      mock(writer.client).insert_job('yourproject_id', {
+        configuration: {
+          load: {
+            destination_table: {
+              project_id: 'yourproject_id',
+              dataset_id: 'yourdataset_id',
+              table_id: 'foo',
+            },
+            write_disposition: "WRITE_APPEND",
+            source_format: "NEWLINE_DELIMITED_JSON",
+            ignore_unknown_values: false,
+            max_bad_records: 0,
+          }
+        }
+      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+        stub!.job_reference.stub!.job_id { "dummy_job_id" }
+      end
+      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
+        stub! do |s|
+          s.id { 'dummy_job_id' }
+          s.configuration.stub! do |_s|
+            _s.load.stub! do |__s|
+              __s.destination_table.stub! do |___s|
+                ___s.project_id { 'yourproject_id' }
+                ___s.dataset_id { 'yourdataset_id' }
+                ___s.table_id { 'foo' }
+              end
+            end
+          end
+          s.status.stub! do |_s|
+            _s.state { 'DONE' }
+            _s.errors { [] }
+            _s.error_result.stub! do |__s|
+              __s.message { 'error' }
+              __s.reason { 'invalid' }
+            end
+          end
+        end
+      end
+    end
+    assert_raise Fluent::BigQuery::UnRetryableError do
+      driver.instance.write(chunk)
+    end
+    assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
+    driver.instance_shutdown
+  end
+  def test_write_with_auto_create_table
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      <buffer>
+        @type memory
+      </buffer>
+      <inject>
+      time_format %s
+      time_key  time
+      </inject>
+      auto_create_table true
+      schema_path #{SCHEMA_PATH}
+    CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
+    stub_writer do |writer|
+      mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
+        raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
+      end
+      mock(writer.client).insert_job('yourproject_id', {
+        configuration: {
+          load: {
+            destination_table: {
+              project_id: 'yourproject_id',
+              dataset_id: 'yourdataset_id',
+              table_id: 'foo',
+            },
+            write_disposition: "WRITE_APPEND",
+            source_format: "NEWLINE_DELIMITED_JSON",
+            ignore_unknown_values: false,
+            max_bad_records: 0,
+            schema: {
+              fields: schema_fields,
+            },
+          }
+        }
+      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+        stub!.job_reference.stub!.job_id { "dummy_job_id" }
+      end
+    end
+    driver.run do
+      driver.feed("tag", Time.now.to_i, {"a" => "b"})
+    end
+  end
+  private
+  def create_response_stub(response)
+    case response
+    when Hash
+      root = stub!
+      response.each do |k, v|
+        root.__send__(k) do
+          create_response_stub(v)
+        end
+      end
+      root
+    when Array
+      response.map { |item| create_response_stub(item) }
+    else
+      response
+    end
+  end
+end

data/test/plugin/test_record_schema.rb ADDED

@@ -0,0 +1,186 @@
+require 'helper'
+class RecordSchemaTest < Test::Unit::TestCase
+  def base_schema
+    [
+      {
+        "name" => "time",
+        "type" => "TIMESTAMP",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "tty",
+        "type" => "STRING",
+        "mode" => "NULLABLE"
+      },
+      {
+        "name" => "pwd",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "user",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "argv",
+        "type" => "STRING",
+        "mode" => "REPEATED"
+      },
+      {
+        "name" => "utilisation",
+        "type" => "NUMERIC",
+        "mode" => "NULLABLE"
+      }
+    ]
+  end
+  def base_schema_with_new_column
+    [
+      {
+        "name" => "time",
+        "type" => "TIMESTAMP",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "tty",
+        "type" => "STRING",
+        "mode" => "NULLABLE"
+      },
+      {
+        "name" => "pwd",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "user",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "argv",
+        "type" => "STRING",
+        "mode" => "REPEATED"
+      },
+      {
+        "name" => "utilisation",
+        "type" => "NUMERIC",
+        "mode" => "NULLABLE"
+      },
+      {
+        "name" => "new_column",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      }
+    ]
+  end
+  def base_schema_with_type_changed_column
+    [
+      {
+        "name" => "time",
+        "type" => "INTEGER", # change type
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "tty",
+        "type" => "STRING",
+        "mode" => "NULLABLE"
+      },
+      {
+        "name" => "pwd",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "user",
+        "type" => "STRING",
+        "mode" => "REQUIRED"
+      },
+      {
+        "name" => "argv",
+        "type" => "STRING",
+        "mode" => "REPEATED"
+      },
+      {
+        "name" => "utilisation",
+        "type" => "NUMERIC",
+        "mode" => "NULLABLE"
+      }
+    ]
+  end
+  def test_load_schema
+    fields = Fluent::BigQuery::RecordSchema.new("record")
+    fields.load_schema(base_schema)
+    assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema }
+  end
+  def test_load_schema_allow_overwrite_with_type_changed_column
+    fields = Fluent::BigQuery::RecordSchema.new("record")
+    fields.load_schema(base_schema)
+    fields.load_schema(base_schema_with_type_changed_column)
+    assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema_with_type_changed_column }
+  end
+  def test_load_schema_allow_overwrite_with_new_column
+    fields = Fluent::BigQuery::RecordSchema.new("record")
+    fields.load_schema(base_schema)
+    fields.load_schema(base_schema_with_new_column)
+    assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema_with_new_column }
+  end
+  def test_format_one
+    fields = Fluent::BigQuery::RecordSchema.new("record")
+    fields.load_schema(base_schema)
+    time = Time.local(2016, 2, 7, 19, 0, 0).utc
+    formatted = fields.format_one({
+      "time" => time, "tty" => nil, "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", 42]
+    })
+    assert_equal(
+      formatted,
+      {
+        "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", "42"]
+      }
+    )
+  end
+  def test_format_one_convert_array_or_hash_to_json
+    fields = Fluent::BigQuery::RecordSchema.new("record")
+    fields.load_schema(base_schema)
+    time = Time.local(2016, 2, 7, 19, 0, 0).utc
+    formatted = fields.format_one({
+      "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
+    })
+    assert_equal(
+      formatted,
+      {
+        "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
+      }
+    )
+  end
+  def test_format_one_with_extra_column
+    fields = Fluent::BigQuery::RecordSchema.new("record")
+    fields.load_schema(base_schema)
+    time = Time.local(2016, 2, 7, 19, 0, 0).utc
+    formatted = fields.format_one({
+      "time" => time, "tty" => nil, "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", 42.195], "extra" => "extra_data"
+    })
+    assert_equal(
+      formatted,
+      {
+        "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", "42.195"], "extra" => "extra_data"
+      }
+    )
+  end
+end