RubyGems - fluent-plugin-bigquery - Versions diffs - 2.2.0 → 3.0.0 - Mend

fluent-plugin-bigquery 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/.github/workflows/linux.yml +30 -0
data/.github/workflows/windows.yml +30 -0
data/Gemfile +3 -0
data/README.md +8 -4
data/integration/README.md +14 -0
data/integration/create_table.sh +4 -0
data/integration/dummer_insert.rb +12 -0
data/integration/dummer_load.rb +12 -0
data/integration/fluent.conf +88 -0
data/integration/schema.json +22 -0
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +17 -8
data/lib/fluent/plugin/out_bigquery_base.rb +28 -5
data/lib/fluent/plugin/out_bigquery_insert.rb +5 -0
data/test/plugin/test_out_bigquery_base.rb +22 -27
data/test/plugin/test_out_bigquery_insert.rb +60 -26
data/test/plugin/test_out_bigquery_load.rb +9 -9
metadata +14 -7
data/.travis.yml +0 -14

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 36b950bf0783d3ce350d7c7514f5b7946b10fe4b867aec015c9331656e86eb48
-  data.tar.gz: b4b8e92f41008043b09822b20698a7e29ca8daf9ba69c2a5c38c696553e86d71
+  metadata.gz: bc6de961c8c42fddf3d9e297e93db560d16cfc098161232c90ee64f0a5679fee
+  data.tar.gz: 5ec1fee690f77d0fa25d8e427c6ad354cdfdbfafe30a4aee4fea9a5e73db5eb3
 SHA512:
-  metadata.gz: 01d3d39d9247134ca9059b990d0d6a52f308b27711d8cd989de30dfeb4e91a1673f1047d4e9269d24447169d9ec4bbac1d0d9b9f7d93b08b7be5d6c170593f1f
-  data.tar.gz: f226de7925fb048ba5533bf9b7c626f43e4b63eeb92c119d700737d1ae44611fb6fe6294e1ed5f989456de2ee3e1f98334c2d4cd1d89c49b52ef945a3674c8ce
+  metadata.gz: 20fc96d420611a0d12f7cb34656ae87872f24131c70039383a8f8b7d51048a7d4f277a80675f2bee834113fd13d2a9780b772b517f2140481f7fb86ce63f24e3
+  data.tar.gz: cecc8f8682761ddfb22d942b69103823cc728923f6d7043d967254ed02c754db4e792132769f7f3aa91986aa27895ac83bf16358be21e03d3c94e77c43975231

data/.github/workflows/linux.yml ADDED Viewed

@@ -0,0 +1,30 @@
+name: Testing on Ubuntu
+on:
+  - push
+  - pull_request
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        ruby:
+          - 2.7
+          - 3.0
+          - 3.1
+        os:
+          - ubuntu-latest
+    name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: ${{ matrix.ruby }}
+    - name: unit testing
+      env:
+        CI: true
+      run: |
+        ruby -v
+        gem install bundler rake
+        bundle install --jobs 4 --retry 3
+        bundle exec rake test

data/.github/workflows/windows.yml ADDED Viewed

@@ -0,0 +1,30 @@
+name: Testing on Windows
+on:
+  - push
+  - pull_request
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        ruby:
+          - 2.7
+          - 3.0
+          - 3.1
+        os:
+          - windows-latest
+    name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: ${{ matrix.ruby }}
+    - name: unit testing
+      env:
+        CI: true
+      run: |
+        ruby -v
+        gem install bundler rake
+        bundle install --jobs 4 --retry 3
+        bundle exec rake test

data/Gemfile CHANGED Viewed

@@ -2,3 +2,6 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in fluent-plugin-bigquery.gemspec
 gemspec
+gem "oj"
+gem "dummer"

data/README.md CHANGED Viewed

@@ -23,6 +23,7 @@ OAuth flow for installed applications.
 | v0.4.x         | 0.12.x          | 2.0 or later |
 | v1.x.x         | 0.14.x or later | 2.2 or later |
 | v2.x.x         | 0.14.x or later | 2.3 or later |
+| v3.x.x         | 1.x or later    | 2.7 or later |
 ## With docker image
 If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
@@ -52,7 +53,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | auto_create_table                             | bool          | no                                           | no           | false                      | If true, creates table automatically                                                                   |
 | ignore_unknown_values                         | bool          | no                                           | no           | false                      | Accept rows that contain values that do not match the schema. The unknown values are ignored.          |
 | schema                                        | array         | yes (either `fetch_schema` or `schema_path`) | no           | nil                        | Schema Definition. It is formatted by JSON.                                                            |
-| schema_path                                   | string        | yes (either `fetch_schema`)                  | no           | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
+| schema_path                                   | string        | yes (either `fetch_schema`)                  | yes          | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
 | fetch_schema                                  | bool          | yes (either `schema_path`)                   | no           | false                      | If true, fetch table schema definition from Bigquery table automatically.                              |
 | fetch_schema_table                            | string        | no                                           | yes          | nil                        | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
 | schema_cache_expire                           | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
@@ -72,6 +73,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | insert_id_field                        | string        | no           | no           | nil                        | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor                                                    |
 | add_insert_timestamp                   | string        | no           | no           | nil                        | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
 | allow_retry_insert_errors              | bool          | no           | no           | false                      | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate.                                                                              |
+| require_partition_filter    | bool          | no                                           | no           | false                      | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
 #### bigquery_load
@@ -379,10 +381,10 @@ format to construct table ids.
 Table ids are formatted at runtime
 using the chunk key time.
-see. http://docs.fluentd.org/v0.14/articles/output-plugin-overview
+see. https://docs.fluentd.org/configuration/buffer-section
 For example, with the configuration below,
-data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
+data is inserted into tables `accesslog_2014_08_02`, `accesslog_2014_08_03` and so on.
 ```apache
 <match dummy>
@@ -392,7 +394,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
   project yourproject_id
   dataset yourdataset_id
-  table   accesslog_%Y_%m
+  table   accesslog_%Y_%m_%d
   <buffer time>
     timekey 1d
@@ -401,6 +403,8 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
 </match>
 ```
+**NOTE: In current fluentd (v1.15.x), The maximum unit supported by strftime formatting is the granularity of days**
 #### record attribute formatting
 The format can be suffixed with attribute name.

data/integration/README.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Requirements
+Set Environment Variable
+- GOOGLE_APPLICATION_CREDENTIALS (json key path)
+- PROJECT_NAME
+- DATASET_NAME
+- TABLE_NAME
+# How to use
+1. execute `create_table.sh`
+1. `bundle exec fluentd -c fluent.conf`
+1. `bundle exec dummer -c dummer_insert.rb` or `bundle exec dummer -c dummer_load.rb`

data/integration/create_table.sh ADDED Viewed

@@ -0,0 +1,4 @@
+#!/bin/sh
+set -eux
+bq mk -t --project_id=${PROJECT_NAME} --schema=$(dirname $0)/schema.json ${DATASET_NAME}.${TABLE_NAME}

data/integration/dummer_insert.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require "time"
+configure "insert" do
+  host "localhost"
+  port 24224
+  rate 100
+  tag type: :string, any: %w(insert_data)
+  field :id, type: :integer, countup: true
+  field :string_field, type: :string, any: %w(str1 str2 str3 str4)
+  field :timestamp_field, type: :string, value: Time.now.iso8601
+  field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
+end

data/integration/dummer_load.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require "time"
+configure "load" do
+  host "localhost"
+  port 24224
+  rate 100
+  tag type: :string, any: %w(load_data)
+  field :id, type: :integer, countup: true
+  field :string_field, type: :string, any: %w(str1 str2 str3 str4)
+  field :timestamp_field, type: :string, value: Time.now.iso8601
+  field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
+end

data/integration/fluent.conf ADDED Viewed

@@ -0,0 +1,88 @@
+<source>
+  @type forward
+  port 24224
+  bind 0.0.0.0
+</source>
+<match insert_data>
+  @id bigquery-insert-integration
+  @type bigquery_insert
+  allow_retry_insert_errors true
+  auth_method json_key
+  json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
+  <buffer>
+    @type file
+    chunk_limit_size 1m
+    chunk_limit_records 1500
+    total_limit_size 1g
+    path ./log/bigquery-insert-integration
+    flush_interval 30
+    flush_thread_count 4
+    flush_at_shutdown true
+    retry_max_times 14
+    retry_max_interval 30m
+  </buffer>
+  request_open_timeout_sec 2m
+  slow_flush_log_threshold 30.0
+  project "#{ENV["PROJECT_NAME"]}"
+  dataset "#{ENV["DATASET_NAME"]}"
+  table "#{ENV["TABLE_NAME"]}"
+  auto_create_table false
+  fetch_schema true
+  fetch_schema_table "#{ENV["TABLE_NAME"]}"
+  insert_id_field id
+  <secondary>
+    @type file
+    path ./log/bigquery-insert-integration.errors
+  </secondary>
+</match>
+<match load_data>
+  @id bigquery-load-integration
+  @type bigquery_load
+  auth_method json_key
+  json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
+  <buffer>
+    @type file
+    chunk_limit_size 1m
+    total_limit_size 1g
+    path ./log/bigquery-load-integration
+    flush_interval 120
+    flush_thread_count 4
+    flush_at_shutdown true
+    retry_max_times 14
+    retry_max_interval 30m
+  </buffer>
+  request_open_timeout_sec 2m
+  slow_flush_log_threshold 300.0
+  project "#{ENV["PROJECT_NAME"]}"
+  dataset "#{ENV["DATASET_NAME"]}"
+  table "#{ENV["TABLE_NAME"]}"
+  auto_create_table false
+  fetch_schema true
+  fetch_schema_table "#{ENV["TABLE_NAME"]}"
+  <secondary>
+    @type file
+    path ./log/bigquery-load-integration.errors
+  </secondary>
+</match>

data/integration/schema.json ADDED Viewed

@@ -0,0 +1,22 @@
+[
+  {
+    "name": "id",
+    "type": "INTEGER",
+    "mode": "REQUIRED"
+  },
+  {
+    "name": "string_field",
+    "type": "STRING",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "timestamp_field",
+    "type": "TIMESTAMP",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "date",
+    "type": "DATE",
+    "mode": "REQUIRED"
+  }
+]

data/lib/fluent/plugin/bigquery/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "2.2.0".freeze
+    VERSION = "3.0.0".freeze
   end
 end

data/lib/fluent/plugin/bigquery/writer.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Fluent
   module BigQuery
     class Writer
-      def initialize(log, auth_method, options = {})
+      def initialize(log, auth_method, **options)
         @auth_method = auth_method
         @scope = "https://www.googleapis.com/auth/bigquery"
         @options = options
@@ -35,8 +35,9 @@ module Fluent
           }
           definition.merge!(time_partitioning: time_partitioning) if time_partitioning
+          definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
           definition.merge!(clustering: clustering) if clustering
-          client.insert_table(project, dataset, definition, {})
+          client.insert_table(project, dataset, definition, **{})
           log.debug "create table", project_id: project, dataset: dataset, table: table_id
         rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
           message = e.message
@@ -82,7 +83,7 @@ module Fluent
         if @options[:auto_create_table]
           res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
         else
-          res = client.insert_all_table_data(project, dataset, table_id, body, {})
+          res = client.insert_all_table_data(project, dataset, table_id, body, **{})
         end
         log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
@@ -157,10 +158,8 @@ module Fluent
         res = client.insert_job(
           project,
           configuration,
-          {
-            upload_source: upload_source,
-            content_type: "application/octet-stream",
-          }
+          upload_source: upload_source,
+          content_type: "application/octet-stream",
         )
         JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
       rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -318,6 +317,16 @@ module Fluent
         end
       end
+      def require_partition_filter
+        return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
+        if @options[:require_partition_filter]
+          @require_partition_filter = @options[:require_partition_filter]
+        else
+          @require_partition_filter
+        end
+      end
       def clustering
         return @clustering if instance_variable_defined?(:@clustering)
@@ -332,7 +341,7 @@ module Fluent
       def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
         try_count ||= 1
-        res = client.insert_all_table_data(project, dataset, table_id, body, {})
+        res = client.insert_all_table_data(project, dataset, table_id, body, **{})
       rescue Google::Apis::ClientError => e
         if e.status_code == 404 && /Not Found: Table/i =~ e.message
           if try_count == 1

data/lib/fluent/plugin/out_bigquery_base.rb CHANGED Viewed

@@ -111,9 +111,6 @@ module Fluent
         if @schema
           @table_schema.load_schema(@schema)
         end
-        if @schema_path
-          @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
-        end
         formatter_config = conf.elements("format")[0]
         @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -126,6 +123,7 @@ module Fluent
         @tables_mutex = Mutex.new
         @fetched_schemas = {}
         @last_fetch_schema_time = Hash.new(0)
+        @read_schemas = {}
       end
       def multi_workers_ready?
@@ -133,7 +131,7 @@ module Fluent
       end
       def writer
-        @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method, {
+        @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
           private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
           email: @email,
           json_key: @json_key,
@@ -148,19 +146,27 @@ module Fluent
           time_partitioning_type: @time_partitioning_type,
           time_partitioning_field: @time_partitioning_field,
           time_partitioning_expiration: @time_partitioning_expiration,
+          require_partition_filter: @require_partition_filter,
           clustering_fields: @clustering_fields,
           timeout_sec: @request_timeout_sec,
           open_timeout_sec: @request_open_timeout_sec,
-        })
+        )
       end
       def format(tag, time, record)
+        if record.nil?
+          log.warn("nil record detected. corrupted chunks? tag=#{tag}, time=#{time}")
+          return
+        end
         record = inject_values_to_record(tag, time, record)
         meta = metadata(tag, time, record)
         schema =
           if @fetch_schema
             fetch_schema(meta)
+          elsif @schema_path
+            read_schema(meta)
           else
             @table_schema
           end
@@ -209,9 +215,26 @@ module Fluent
         extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
       end
+      def read_schema(metadata)
+        schema_path = read_schema_target_path(metadata)
+        unless @read_schemas[schema_path]
+          table_schema = Fluent::BigQuery::RecordSchema.new("record")
+          table_schema.load_schema(MultiJson.load(File.read(schema_path)))
+          @read_schemas[schema_path] = table_schema
+        end
+        @read_schemas[schema_path]
+      end
+      def read_schema_target_path(metadata)
+        extract_placeholders(@schema_path, metadata)
+      end
       def get_schema(project, dataset, metadata)
         if @fetch_schema
           @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
+        elsif @schema_path
+          @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
         else
           @table_schema
         end

data/lib/fluent/plugin/out_bigquery_insert.rb CHANGED Viewed

@@ -29,6 +29,9 @@ module Fluent
       # If insert_id_field is not specified, true means to allow duplicate rows
       config_param :allow_retry_insert_errors, :bool, default: false
+      ## RequirePartitionFilter
+      config_param :require_partition_filter, :bool, default: false
       ## Buffer
       config_section :buffer do
         config_set_default :@type, "memory"
@@ -93,6 +96,8 @@ module Fluent
         schema = get_schema(project, dataset, metadata)
         insert(project, dataset, table_id, rows, schema, template_suffix)
+      rescue MultiJson::ParseError => e
+        raise Fluent::UnrecoverableError.new(e)
       end
       def insert(project, dataset, table_id, rows, schema, template_suffix)

data/test/plugin/test_out_bigquery_base.rb CHANGED Viewed

@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
     assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
   end
-  def test_configure_auth_json_key_as_file_raise_permission_error
-    json_key_path = 'test/plugin/testdata/json_key.json'
-    json_key_path_dir = File.dirname(json_key_path)
-    begin
-      File.chmod(0000, json_key_path_dir)
-      driver = create_driver(%[
-        table foo
-        auth_method json_key
-        json_key #{json_key_path}
-        project yourproject_id
-        dataset yourdataset_id
-        schema [
-          {"name": "time", "type": "INTEGER"},
-          {"name": "status", "type": "INTEGER"},
-          {"name": "bytes", "type": "INTEGER"}
-        ]
-      ])
-      assert_raises(Errno::EACCES) do
-        driver.instance.writer.client
-      end
-    ensure
-      File.chmod(0755, json_key_path_dir)
-    end
-  end
   def test_configure_auth_json_key_as_string
     json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
     json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
   end
   def test_configure_auth_application_default
+    omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
     driver = create_driver(%[
       table foo
       auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
     assert_equal :string, table_schema["argv"].type
     assert_equal :repeated, table_schema["argv"].mode
   end
+  def test_resolve_schema_path_with_placeholder
+    now = Time.now.to_i
+    driver = create_driver(<<-CONFIG)
+      table ${tag}_%Y%m%d
+      auth_method json_key
+      json_key jsonkey.josn
+      project yourproject_id
+      dataset yourdataset_id
+      schema_path ${tag}.schema
+      <buffer tag, time>
+        timekey 1d
+      </buffer>
+    CONFIG
+    metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
+    assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
+  end
 end

data/test/plugin/test_out_bigquery_insert.rb CHANGED Viewed

@@ -5,6 +5,19 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     Fluent::Test.setup
   end
+  def is_ruby2?
+    RUBY_VERSION.to_i < 3
+  end
+  def build_args(args)
+    if is_ruby2?
+      args << {}
+    end
+    args
+  end
+  SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
   CONFIG = %[
     table foo
     email foo@bar.example
@@ -121,11 +134,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     driver = create_driver
     stub_writer do |writer|
-      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
+      args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
         rows: [{json: hash_including(entry)}],
         skip_invalid_rows: false,
         ignore_unknown_values: false
-      }, {}) do
+      }])
+      mock(writer.client).insert_all_table_data(*args) do
         s = stub!
         s.insert_errors { nil }
         s
@@ -186,11 +200,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       entry = {a: "b"}
       stub_writer do |writer|
-        mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
+        args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
           rows: [{json: hash_including(entry)}],
           skip_invalid_rows: false,
           ignore_unknown_values: false
-        }, {}) do
+        }])
+        mock(writer.client).insert_all_table_data(*args) do
           ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
           raise ex
         end
@@ -245,11 +260,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     entry = {a: "b"}
     stub_writer do |writer|
-      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
+      args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
         rows: [{json: hash_including(entry)}],
         skip_invalid_rows: false,
         ignore_unknown_values: false
-      }, {}) do
+      }])
+      mock(writer.client).insert_all_table_data(*args) do
         ex = Google::Apis::ServerError.new("error", status_code: 501)
         def ex.reason
           "invalid"
@@ -260,14 +276,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
-    metadata = driver.instance.metadata_for_test(tag, time, record)
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
     chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
       c.append([driver.instance.format(tag, time, record)])
     end
     assert_raise Fluent::BigQuery::UnRetryableError do
       driver.instance.write(chunk)
     end
-    assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
+    assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.2
     driver.instance_shutdown
   end
@@ -290,11 +306,15 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     CONFIG
     stub_writer do |writer|
-      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
-          rows: [entry[0]],
-          skip_invalid_rows: false,
-          ignore_unknown_values: false
-        }, {}) { stub!.insert_errors { nil } }
+      args = ['yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
+        rows: [entry[0]],
+        skip_invalid_rows: false,
+        ignore_unknown_values: false
+      }]
+      if RUBY_VERSION.to_i < 3
+        args << {}
+      end
+      mock(writer.client).insert_all_table_data(*args) { stub!.insert_errors { nil } }
     end
     driver.run do
@@ -344,25 +364,29 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
     CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     stub_writer do |writer|
       body = {
         rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
         skip_invalid_rows: false,
         ignore_unknown_values: false,
       }
-      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
+      args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
+      mock(writer.client).insert_all_table_data(*args) do
         raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
       end.at_least(1)
       mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
-      mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
+      args = build_args(['yourproject_id', 'yourdataset_id', {
         table_reference: {
           table_id: 'foo',
         },
         schema: {
-          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+          fields: schema_fields,
         },
-      }, {})
+      }])
+      mock(writer.client).insert_table(*args)
     end
     assert_raise(RuntimeError) do
@@ -416,32 +440,39 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       time_partitioning_type day
       time_partitioning_field time
       time_partitioning_expiration 1h
+      require_partition_filter true
     CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     stub_writer do |writer|
       body = {
         rows: [message],
         skip_invalid_rows: false,
         ignore_unknown_values: false,
       }
-      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
+      args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
+      mock(writer.client).insert_all_table_data(*args) do
         raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
       end.at_least(1)
       mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
-      mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
+      args = build_args(['yourproject_id', 'yourdataset_id', {
         table_reference: {
           table_id: 'foo',
         },
         schema: {
-          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+          fields: schema_fields,
         },
         time_partitioning: {
           type: 'DAY',
           field: 'time',
           expiration_ms: 3600000,
         },
-      }, {})
+        require_partition_filter: true,
+      }])
+      mock(writer.client).insert_table(*args)
     end
     assert_raise(RuntimeError) do
@@ -495,7 +526,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       time_partitioning_type day
       time_partitioning_field time
       time_partitioning_expiration 1h
-      time_partitioning_require_partition_filter true
       clustering_fields [
         "time",
@@ -503,23 +533,26 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       ]
     CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     stub_writer do |writer|
       body = {
         rows: [message],
         skip_invalid_rows: false,
         ignore_unknown_values: false,
       }
-      mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
+      args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
+      mock(writer.client).insert_all_table_data(*args) do
         raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
       end.at_least(1)
       mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
-      mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
+      args = build_args(['yourproject_id', 'yourdataset_id', {
         table_reference: {
           table_id: 'foo',
         },
         schema: {
-          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+          fields: schema_fields,
         },
         time_partitioning: {
           type: 'DAY',
@@ -532,7 +565,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
             'vhost',
           ],
         },
-      }, {})
+      }])
+      mock(writer.client).insert_table(*args)
     end
     assert_raise(RuntimeError) do

data/test/plugin/test_out_bigquery_load.rb CHANGED Viewed

@@ -64,7 +64,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
             max_bad_records: 0,
           }
         }
-      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+      }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
     end
@@ -117,7 +117,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
           },
         },
         job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
-      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+      }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
     end
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
-    metadata = driver.instance.metadata_for_test(tag, time, record)
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
     chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
       c.append([driver.instance.format(tag, time, record)])
     end
@@ -154,11 +154,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
             max_bad_records: 0,
           }
         }
-      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+      }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
-      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
+      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
         stub! do |s|
           s.id { 'dummy_job_id' }
           s.configuration.stub! do |_s|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
-    metadata = driver.instance.metadata_for_test(tag, time, record)
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
     chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
       c.append([driver.instance.format(tag, time, record)])
     end
@@ -237,11 +237,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
             max_bad_records: 0,
           }
         }
-      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+      }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
-      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
+      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
         stub! do |s|
           s.id { 'dummy_job_id' }
           s.configuration.stub! do |_s|
@@ -317,7 +317,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
             },
           }
         }
-      }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
+      }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
     end

metadata CHANGED Viewed

@@ -1,15 +1,15 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 2.2.0
+  version: 3.0.0
 platform: ruby
 authors:
 - Naoya Ito
 - joker1007
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-08-20 00:00:00.000000000 Z
+date: 2022-10-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -139,14 +139,21 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".github/ISSUE_TEMPLATE.md"
+- ".github/workflows/linux.yml"
+- ".github/workflows/windows.yml"
 - ".gitignore"
-- ".travis.yml"
 - Gemfile
 - LICENSE.txt
 - README.md
 - Rakefile
 - fluent-plugin-bigquery.gemspec
 - gemfiles/activesupport-4.gemfile
+- integration/README.md
+- integration/create_table.sh
+- integration/dummer_insert.rb
+- integration/dummer_load.rb
+- integration/fluent.conf
+- integration/schema.json
 - lib/fluent/plugin/bigquery/errors.rb
 - lib/fluent/plugin/bigquery/helper.rb
 - lib/fluent/plugin/bigquery/schema.rb
@@ -168,7 +175,7 @@ homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
 licenses:
 - Apache-2.0
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -183,8 +190,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
-signing_key:
+rubygems_version: 3.3.7
+signing_key:
 specification_version: 4
 summary: Fluentd plugin to store data on Google BigQuery
 test_files:

data/.travis.yml DELETED Viewed

@@ -1,14 +0,0 @@
-language: ruby
-rvm:
-  - 2.3.7
-  - 2.4.4
-  - 2.5.1
-gemfile:
-  - Gemfile
-before_install:
-  - gem update bundler
-script: bundle exec rake test