RubyGems - fluent-plugin-bigquery - Versions diffs - 2.2.0 → 2.3.0 - Mend

fluent-plugin-bigquery 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.github/workflows/linux.yml +31 -0
data/.github/workflows/windows.yml +27 -0
data/README.md +10 -1
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +11 -0
data/lib/fluent/plugin/out_bigquery_base.rb +21 -3
data/lib/fluent/plugin/out_bigquery_insert.rb +3 -0
data/test/plugin/test_out_bigquery_base.rb +22 -27
data/test/plugin/test_out_bigquery_insert.rb +15 -5
data/test/plugin/test_out_bigquery_load.rb +4 -4
metadata +5 -4
data/.travis.yml +0 -14

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 36b950bf0783d3ce350d7c7514f5b7946b10fe4b867aec015c9331656e86eb48
-  data.tar.gz: b4b8e92f41008043b09822b20698a7e29ca8daf9ba69c2a5c38c696553e86d71
+  metadata.gz: 4209a2b6eaaf0b6f8ba315b6f5de6690e28fb47890aeea777bdb31889e4785ab
+  data.tar.gz: b0983fb4fa16d72059b0e679ea4ee627d19e805779fa010888fa1723354896a5
 SHA512:
-  metadata.gz: 01d3d39d9247134ca9059b990d0d6a52f308b27711d8cd989de30dfeb4e91a1673f1047d4e9269d24447169d9ec4bbac1d0d9b9f7d93b08b7be5d6c170593f1f
-  data.tar.gz: f226de7925fb048ba5533bf9b7c626f43e4b63eeb92c119d700737d1ae44611fb6fe6294e1ed5f989456de2ee3e1f98334c2d4cd1d89c49b52ef945a3674c8ce
+  metadata.gz: a6fc6891eda12bbc1272af7af9c4e8d48e588bc7ef65153b3a7524e39468baebb8fdb925856d1850bbda12fed5d33865faa56542503f76fdf724a18937c7d56e
+  data.tar.gz: fff0599b6a838cb4ff233ba9585b558ff733eed8063c1cf36ee08aaacb9b3c2ca1bce4d13db2a51ecc72c398ba751a18b2856a6348f43738ee8ca366becdea61

data/.github/workflows/linux.yml ADDED Viewed

@@ -0,0 +1,31 @@
+name: Testing on Ubuntu
+on:
+  - push
+  - pull_request
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        ruby:
+          - 2.6
+          - 2.7
+          - 3.0
+          - 3.1
+        os:
+          - ubuntu-latest
+    name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: ${{ matrix.ruby }}
+    - name: unit testing
+      env:
+        CI: true
+      run: |
+        ruby -v
+        gem install bundler rake
+        bundle install --jobs 4 --retry 3
+        bundle exec rake test

data/.github/workflows/windows.yml ADDED Viewed

@@ -0,0 +1,27 @@
+name: Testing on Windows
+on:
+  - push
+  - pull_request
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        ruby: [ '2.6', '2.7', '3.0', '3.1' ]
+        os:
+          - windows-latest
+    name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: ${{ matrix.ruby }}
+    - name: unit testing
+      env:
+        CI: true
+      run: |
+        ruby -v
+        gem install bundler rake
+        bundle install --jobs 4 --retry 3
+        bundle exec rake test

data/README.md CHANGED Viewed

@@ -1,5 +1,13 @@
 # fluent-plugin-bigquery
+## Notice
+We will transfer fluent-plugin-bigquery repository to [fluent-plugins-nursery](https://github.com/fluent-plugins-nursery) organization.
+It does not change maintenance plan.
+The main purpose is that it solves mismatch between maintainers and current organization.
+---
 [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
 - **Plugin type**: Output
@@ -52,7 +60,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | auto_create_table                             | bool          | no                                           | no           | false                      | If true, creates table automatically                                                                   |
 | ignore_unknown_values                         | bool          | no                                           | no           | false                      | Accept rows that contain values that do not match the schema. The unknown values are ignored.          |
 | schema                                        | array         | yes (either `fetch_schema` or `schema_path`) | no           | nil                        | Schema Definition. It is formatted by JSON.                                                            |
-| schema_path                                   | string        | yes (either `fetch_schema`)                  | no           | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
+| schema_path                                   | string        | yes (either `fetch_schema`)                  | yes          | nil                        | Schema Definition file path. It is formatted by JSON.                                                  |
 | fetch_schema                                  | bool          | yes (either `schema_path`)                   | no           | false                      | If true, fetch table schema definition from Bigquery table automatically.                              |
 | fetch_schema_table                            | string        | no                                           | yes          | nil                        | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
 | schema_cache_expire                           | integer       | no                                           | no           | 600                        | Value is second. If current time is after expiration interval, re-fetch table schema definition.       |
@@ -72,6 +80,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
 | insert_id_field                        | string        | no           | no           | nil                        | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor                                                    |
 | add_insert_timestamp                   | string        | no           | no           | nil                        | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
 | allow_retry_insert_errors              | bool          | no           | no           | false                      | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate.                                                                              |
+| require_partition_filter    | bool          | no                                           | no           | false                      | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
 #### bigquery_load

data/lib/fluent/plugin/bigquery/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "2.2.0".freeze
+    VERSION = "2.3.0".freeze
   end
 end

data/lib/fluent/plugin/bigquery/writer.rb CHANGED Viewed

@@ -35,6 +35,7 @@ module Fluent
           }
           definition.merge!(time_partitioning: time_partitioning) if time_partitioning
+          definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
           definition.merge!(clustering: clustering) if clustering
           client.insert_table(project, dataset, definition, {})
           log.debug "create table", project_id: project, dataset: dataset, table: table_id
@@ -318,6 +319,16 @@ module Fluent
         end
       end
+      def require_partition_filter
+        return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
+        if @options[:require_partition_filter]
+          @require_partition_filter = @options[:require_partition_filter]
+        else
+          @require_partition_filter
+        end
+      end
       def clustering
         return @clustering if instance_variable_defined?(:@clustering)

data/lib/fluent/plugin/out_bigquery_base.rb CHANGED Viewed

@@ -111,9 +111,6 @@ module Fluent
         if @schema
           @table_schema.load_schema(@schema)
         end
-        if @schema_path
-          @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
-        end
         formatter_config = conf.elements("format")[0]
         @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -126,6 +123,7 @@ module Fluent
         @tables_mutex = Mutex.new
         @fetched_schemas = {}
         @last_fetch_schema_time = Hash.new(0)
+        @read_schemas = {}
       end
       def multi_workers_ready?
@@ -148,6 +146,7 @@ module Fluent
           time_partitioning_type: @time_partitioning_type,
           time_partitioning_field: @time_partitioning_field,
           time_partitioning_expiration: @time_partitioning_expiration,
+          require_partition_filter: @require_partition_filter,
           clustering_fields: @clustering_fields,
           timeout_sec: @request_timeout_sec,
           open_timeout_sec: @request_open_timeout_sec,
@@ -161,6 +160,8 @@ module Fluent
         schema =
           if @fetch_schema
             fetch_schema(meta)
+          elsif @schema_path
+            read_schema(meta)
           else
             @table_schema
           end
@@ -209,9 +210,26 @@ module Fluent
         extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
       end
+      def read_schema(metadata)
+        schema_path = read_schema_target_path(metadata)
+        unless @read_schemas[schema_path]
+          table_schema = Fluent::BigQuery::RecordSchema.new("record")
+          table_schema.load_schema(MultiJson.load(File.read(schema_path)))
+          @read_schemas[schema_path] = table_schema
+        end
+        @read_schemas[schema_path]
+      end
+      def read_schema_target_path(metadata)
+        extract_placeholders(@schema_path, metadata)
+      end
       def get_schema(project, dataset, metadata)
         if @fetch_schema
           @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
+        elsif @schema_path
+          @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
         else
           @table_schema
         end

data/lib/fluent/plugin/out_bigquery_insert.rb CHANGED Viewed

@@ -29,6 +29,9 @@ module Fluent
       # If insert_id_field is not specified, true means to allow duplicate rows
       config_param :allow_retry_insert_errors, :bool, default: false
+      ## RequirePartitionFilter
+      config_param :require_partition_filter, :bool, default: false
       ## Buffer
       config_section :buffer do
         config_set_default :@type, "memory"

data/test/plugin/test_out_bigquery_base.rb CHANGED Viewed

@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
     assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
   end
-  def test_configure_auth_json_key_as_file_raise_permission_error
-    json_key_path = 'test/plugin/testdata/json_key.json'
-    json_key_path_dir = File.dirname(json_key_path)
-    begin
-      File.chmod(0000, json_key_path_dir)
-      driver = create_driver(%[
-        table foo
-        auth_method json_key
-        json_key #{json_key_path}
-        project yourproject_id
-        dataset yourdataset_id
-        schema [
-          {"name": "time", "type": "INTEGER"},
-          {"name": "status", "type": "INTEGER"},
-          {"name": "bytes", "type": "INTEGER"}
-        ]
-      ])
-      assert_raises(Errno::EACCES) do
-        driver.instance.writer.client
-      end
-    ensure
-      File.chmod(0755, json_key_path_dir)
-    end
-  end
   def test_configure_auth_json_key_as_string
     json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
     json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
   end
   def test_configure_auth_application_default
+    omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
     driver = create_driver(%[
       table foo
       auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
     assert_equal :string, table_schema["argv"].type
     assert_equal :repeated, table_schema["argv"].mode
   end
+  def test_resolve_schema_path_with_placeholder
+    now = Time.now.to_i
+    driver = create_driver(<<-CONFIG)
+      table ${tag}_%Y%m%d
+      auth_method json_key
+      json_key jsonkey.josn
+      project yourproject_id
+      dataset yourdataset_id
+      schema_path ${tag}.schema
+      <buffer tag, time>
+        timekey 1d
+      </buffer>
+    CONFIG
+    metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
+    assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
+  end
 end

data/test/plugin/test_out_bigquery_insert.rb CHANGED Viewed

@@ -5,6 +5,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     Fluent::Test.setup
   end
+  SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
   CONFIG = %[
     table foo
     email foo@bar.example
@@ -260,7 +262,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
-    metadata = driver.instance.metadata_for_test(tag, time, record)
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
     chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
       c.append([driver.instance.format(tag, time, record)])
     end
@@ -344,6 +346,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
     CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     stub_writer do |writer|
       body = {
         rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
@@ -360,7 +364,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
           table_id: 'foo',
         },
         schema: {
-          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+          fields: schema_fields,
         },
       }, {})
     end
@@ -416,8 +420,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       time_partitioning_type day
       time_partitioning_field time
       time_partitioning_expiration 1h
+      require_partition_filter true
     CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     stub_writer do |writer|
       body = {
         rows: [message],
@@ -434,13 +442,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
           table_id: 'foo',
         },
         schema: {
-          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+          fields: schema_fields,
         },
         time_partitioning: {
           type: 'DAY',
           field: 'time',
           expiration_ms: 3600000,
         },
+        require_partition_filter: true,
       }, {})
     end
@@ -495,7 +504,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       time_partitioning_type day
       time_partitioning_field time
       time_partitioning_expiration 1h
-      time_partitioning_require_partition_filter true
       clustering_fields [
         "time",
@@ -503,6 +511,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
       ]
     CONFIG
+    schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
     stub_writer do |writer|
       body = {
         rows: [message],
@@ -519,7 +529,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
           table_id: 'foo',
         },
         schema: {
-          fields: driver.instance.instance_variable_get(:@table_schema).to_a,
+          fields: schema_fields,
         },
         time_partitioning: {
           type: 'DAY',

data/test/plugin/test_out_bigquery_load.rb CHANGED Viewed

@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
-    metadata = driver.instance.metadata_for_test(tag, time, record)
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
     chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
       c.append([driver.instance.format(tag, time, record)])
     end
@@ -158,7 +158,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
-      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
+      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
         stub! do |s|
           s.id { 'dummy_job_id' }
           s.configuration.stub! do |_s|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
     driver.instance_start
     tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
-    metadata = driver.instance.metadata_for_test(tag, time, record)
+    metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
     chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
       c.append([driver.instance.format(tag, time, record)])
     end
@@ -241,7 +241,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
         stub!.job_reference.stub!.job_id { "dummy_job_id" }
       end
-      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
+      mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
         stub! do |s|
           s.id { 'dummy_job_id' }
           s.configuration.stub! do |_s|

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 2.2.0
+  version: 2.3.0
 platform: ruby
 authors:
 - Naoya Ito
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-08-20 00:00:00.000000000 Z
+date: 2022-02-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -139,8 +139,9 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".github/ISSUE_TEMPLATE.md"
+- ".github/workflows/linux.yml"
+- ".github/workflows/windows.yml"
 - ".gitignore"
-- ".travis.yml"
 - Gemfile
 - LICENSE.txt
 - README.md
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: Fluentd plugin to store data on Google BigQuery

data/.travis.yml DELETED Viewed

@@ -1,14 +0,0 @@
-language: ruby
-rvm:
-  - 2.3.7
-  - 2.4.4
-  - 2.5.1
-gemfile:
-  - Gemfile
-before_install:
-  - gem update bundler
-script: bundle exec rake test