RubyGems - fluent-plugin-bigquery - Versions diffs - 0.4.1 → 0.4.2 - Mend

fluent-plugin-bigquery 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/fluent/plugin/bigquery/errors.rb +1 -1
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +32 -27
data/lib/fluent/plugin/out_bigquery.rb +13 -8
data/test/plugin/test_out_bigquery.rb +67 -67
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
-  data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
+  metadata.gz: fb46d9ded6ca44476f2a241a4a08f5abff3e99f4
+  data.tar.gz: 2729484cdd6de6edbd9636f0c01eeb69c9b0368b
 SHA512:
-  metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
-  data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
+  metadata.gz: aa84153cb3e53c093cc888f93ea211e1f6852f2f6a08ad7eab875438d7e7c0a5be8ab9b1c8b9c181d3655c981b47e756ed9adf06fbe71142f98bb9f128f773e2
+  data.tar.gz: a0fd64ab52abe46eccde000d364ce79dca01a3ae3d9dde48d36963ae4ca03bfe9e17dc913b10d5ea6706765dff2cbc0a8bc34df7b1ba1a3345accd60283478e0

data/lib/fluent/plugin/bigquery/errors.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Fluent
     class Error < StandardError
       RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
       RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
-      RETRYABLE_STATUS_CODE = [500, 503]
+      RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
       class << self
         def wrap(google_api_error, message = nil, force_unretryable: false)

data/lib/fluent/plugin/bigquery/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "0.4.1".freeze
+    VERSION = "0.4.2".freeze
   end
 end

data/lib/fluent/plugin/bigquery/writer.rb CHANGED Viewed

@@ -1,10 +1,10 @@
 module Fluent
   module BigQuery
     class Writer
-      def initialize(log, auth_method, auth_options = {})
+      def initialize(log, auth_method, options = {})
         @auth_method = auth_method
         @scope = "https://www.googleapis.com/auth/bigquery"
-        @auth_options = auth_options
+        @options = options
         @log = log
         @num_errors_per_chunk = {}
@@ -22,7 +22,7 @@ module Fluent
         @client = client
       end
-      def create_table(project, dataset, table_id, record_schema, time_partitioning_type: nil, time_partitioning_expiration: nil)
+      def create_table(project, dataset, table_id, record_schema)
         create_table_retry_limit = 3
         create_table_retry_wait = 1
         create_table_retry_count = 0
@@ -38,10 +38,10 @@ module Fluent
             }
           }
-          if time_partitioning_type
+          if @options[:time_partitioning_type]
             definition[:time_partitioning] = {
-              type: time_partitioning_type.to_s.upcase,
-              expiration_ms: time_partitioning_expiration ? time_partitioning_expiration * 1000 : nil
+              type: @options[:time_partitioning_type].to_s.upcase,
+              expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
             }.compact
           end
           client.insert_table(project, dataset, definition, {})
@@ -84,21 +84,21 @@ module Fluent
         nil
       end
-      def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
+      def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
         body = {
           rows: rows,
-          skip_invalid_rows: skip_invalid_rows,
-          ignore_unknown_values: ignore_unknown_values,
+          skip_invalid_rows: @options[:skip_invalid_rows],
+          ignore_unknown_values: @options[:ignore_unknown_values],
         }
         body.merge!(template_suffix: template_suffix) if template_suffix
         res = client.insert_all_table_data(project, dataset, table_id, body, {
-          options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
+          options: {timeout_sec: @options[:timeout_sec], open_timeout_sec: @options[:open_timeout_sec]}
         })
         log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
         if res.insert_errors && !res.insert_errors.empty?
           log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
-          if allow_retry_insert_errors
+          if @options[:allow_retry_insert_errors]
             is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
               insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
             end
@@ -118,7 +118,7 @@ module Fluent
         raise Fluent::BigQuery::Error.wrap(e)
       end
-      def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
+      def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields)
         configuration = {
           configuration: {
             load: {
@@ -132,14 +132,14 @@ module Fluent
               },
               write_disposition: "WRITE_APPEND",
               source_format: "NEWLINE_DELIMITED_JSON",
-              ignore_unknown_values: ignore_unknown_values,
-              max_bad_records: max_bad_records,
+              ignore_unknown_values: @options[:ignore_unknown_values],
+              max_bad_records: @options[:max_bad_records],
             }
           }
         }
-        job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
-        configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
+        job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
+        configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
         configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
         # If target table is already exist, omit schema configuration.
@@ -159,8 +159,8 @@ module Fluent
             upload_source: upload_source,
             content_type: "application/octet-stream",
             options: {
-              timeout_sec: timeout_sec,
-              open_timeout_sec: open_timeout_sec,
+              timeout_sec: @options[:timeout_sec],
+              open_timeout_sec: @options[:open_timeout_sec],
             }
           }
         )
@@ -172,14 +172,19 @@ module Fluent
         reason = e.respond_to?(:reason) ? e.reason : nil
         log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
-        if auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
+        if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
           # Table Not Found: Auto Create Table
-          create_table(project, dataset, table_id, fields, time_partitioning_type: time_partitioning_type, time_partitioning_expiration: time_partitioning_expiration)
+          create_table(
+            project,
+            dataset,
+            table_id,
+            fields,
+          )
           raise "table created. send rows next time."
         end
         if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
-          wait_load_job(chunk_id, project, dataset, job_id, table_id)
+          wait_load_job(chunk_id, project, dataset, job_id, table_id)
           @num_errors_per_chunk.delete(chunk_id)
           return
         end
@@ -242,9 +247,9 @@ module Fluent
       def get_auth_from_private_key
         require 'google/api_client/auth/key_utils'
-        private_key_path = @auth_options[:private_key_path]
-        private_key_passphrase = @auth_options[:private_key_passphrase]
-        email = @auth_options[:email]
+        private_key_path = @options[:private_key_path]
+        private_key_passphrase = @options[:private_key_passphrase]
+        email = @options[:email]
         key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
         Signet::OAuth2::Client.new(
@@ -261,7 +266,7 @@ module Fluent
       end
       def get_auth_from_json_key
-        json_key = @auth_options[:json_key]
+        json_key = @options[:json_key]
         begin
           JSON.parse(json_key)
@@ -283,8 +288,8 @@ module Fluent
         table_id.gsub(/\$\d+$/, "")
       end
-      def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
-        job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
+      def create_job_id(chunk_id, dataset, table, schema)
+        job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id]}"
         @log.debug "job_id_key: #{job_id_key}"
         "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
       end

data/lib/fluent/plugin/out_bigquery.rb CHANGED Viewed

@@ -288,6 +288,16 @@ module Fluent
         private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
         email: @email,
         json_key: @json_key,
+        skip_invalid_rows: @skip_invalid_rows,
+        ignore_unknown_values: @ignore_unknown_values,
+        max_bad_records: @max_bad_records,
+        allow_retry_insert_errors: @allow_retry_insert_errors,
+        prevent_duplicate_load: @prevent_duplicate_load,
+        auto_create_table: @auto_create_table,
+        time_partitioning_type: @time_partitioning_type,
+        time_partitioning_expiration: @time_partitioning_expiration,
+        timeout_sec: @request_timeout_sec,
+        open_timeout_sec: @request_open_timeout_sec,
       })
     end
@@ -427,11 +437,11 @@ module Fluent
       end
       def insert(table_id, rows, template_suffix)
-        writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
+        writer.insert_rows(@project, @dataset, table_id, rows, template_suffix: template_suffix)
       rescue Fluent::BigQuery::Error => e
         if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
           # Table Not Found: Auto Create Table
-          writer.create_table(@project, @dataset, table_id, @fields, time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration)
+          writer.create_table(@project, @dataset, table_id, @fields)
           raise "table created. send rows next time."
         end
@@ -473,12 +483,7 @@ module Fluent
         res = nil
         create_upload_source(chunk) do |upload_source|
-          res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
-            prevent_duplicate_load: @prevent_duplicate_load,
-            ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
-            timeout_sec: @request_timeout_sec,  open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
-            time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
-          })
+          res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields)
         end
       rescue Fluent::BigQuery::Error => e
         if e.retryable?

data/test/plugin/test_out_bigquery.rb CHANGED Viewed

@@ -754,10 +754,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
     driver = create_driver
     writer = stub_writer(driver)
-    mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, hash_including(
-      skip_invalid_rows: false,
-      ignore_unknown_values: false
-    ))
+    mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, template_suffix: nil)
     mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
       rows: entry,
       skip_invalid_rows: false,
@@ -780,62 +777,71 @@ class BigQueryOutputTest < Test::Unit::TestCase
   def test_write_with_retryable_error
     entry = {json: {a: "b"}}, {json: {b: "c"}}
-    driver = create_driver(<<-CONFIG)
-      table foo
-      email foo@bar.example
-      private_key_path /path/to/key
-      project yourproject_id
-      dataset yourdataset_id
-      time_format %s
-      time_field  time
-      schema [
-        {"name": "time", "type": "INTEGER"},
-        {"name": "status", "type": "INTEGER"},
-        {"name": "bytes", "type": "INTEGER"},
-        {"name": "vhost", "type": "STRING"},
-        {"name": "path", "type": "STRING"},
-        {"name": "method", "type": "STRING"},
-        {"name": "protocol", "type": "STRING"},
-        {"name": "agent", "type": "STRING"},
-        {"name": "referer", "type": "STRING"},
-        {"name": "remote", "type": "RECORD", "fields": [
-          {"name": "host", "type": "STRING"},
-          {"name": "ip", "type": "STRING"},
-          {"name": "user", "type": "STRING"}
-        ]},
-        {"name": "requesttime", "type": "FLOAT"},
-        {"name": "bot_access", "type": "BOOLEAN"},
-        {"name": "loginsession", "type": "BOOLEAN"}
-      ]
-      <secondary>
-        type file
-        path error
-        utc
-      </secondary>
-    CONFIG
+    data_input = [
+      { "status_code" => 500  },
+      { "status_code" => 502  },
+      { "status_code" => 503  },
+      { "status_code" => 504  },
+    ]
-    writer = stub_writer(driver)
-    mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
-      rows: entry,
-      skip_invalid_rows: false,
-      ignore_unknown_values: false
-    }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
-      ex = Google::Apis::ServerError.new("error", status_code: 500)
-      raise ex
-    end
+    data_input.each do |d|
+      driver = create_driver(<<-CONFIG)
+        table foo
+        email foo@bar.example
+        private_key_path /path/to/key
+        project yourproject_id
+        dataset yourdataset_id
-    chunk = Fluent::MemoryBufferChunk.new("my.tag")
-    entry.each do |e|
-      chunk << e.to_msgpack
-    end
+        time_format %s
+        time_field  time
-    driver.instance.start
-    assert_raise Fluent::BigQuery::RetryableError do
-      driver.instance.write(chunk)
-    end
-    driver.instance.shutdown
+          schema [
+            {"name": "time", "type": "INTEGER"},
+            {"name": "status", "type": "INTEGER"},
+            {"name": "bytes", "type": "INTEGER"},
+            {"name": "vhost", "type": "STRING"},
+            {"name": "path", "type": "STRING"},
+            {"name": "method", "type": "STRING"},
+            {"name": "protocol", "type": "STRING"},
+            {"name": "agent", "type": "STRING"},
+            {"name": "referer", "type": "STRING"},
+            {"name": "remote", "type": "RECORD", "fields": [
+              {"name": "host", "type": "STRING"},
+              {"name": "ip", "type": "STRING"},
+              {"name": "user", "type": "STRING"}
+            ]},
+            {"name": "requesttime", "type": "FLOAT"},
+            {"name": "bot_access", "type": "BOOLEAN"},
+            {"name": "loginsession", "type": "BOOLEAN"}
+          ]
+          <secondary>
+            type file
+            path error
+            utc
+          </secondary>
+        CONFIG
+        writer = stub_writer(driver)
+        mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
+          rows: entry,
+          skip_invalid_rows: false,
+          ignore_unknown_values: false
+        }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
+          ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
+          raise ex
+        end
+        chunk = Fluent::MemoryBufferChunk.new("my.tag")
+        entry.each do |e|
+          chunk << e.to_msgpack
+        end
+        driver.instance.start
+        assert_raise Fluent::BigQuery::RetryableError do
+          driver.instance.write(chunk)
+        end
+        driver.instance.shutdown
+      end
   end
   def test_write_with_not_retryable_error
@@ -1455,11 +1461,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
       schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
     CONFIG
     writer = stub_writer(driver)
-    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
-      skip_invalid_rows: false,
-      ignore_unknown_values: false,
-    )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
-    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: nil, time_partitioning_expiration: nil)
+    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
+    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
     chunk = Fluent::MemoryBufferChunk.new("my.tag")
     chunk << message.to_msgpack
@@ -1517,11 +1520,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
       time_partitioning_expiration 1h
     CONFIG
     writer = stub_writer(driver)
-    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
-      skip_invalid_rows: false,
-      ignore_unknown_values: false,
-    )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
-    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: :day, time_partitioning_expiration: 3600)
+    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
+    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
     chunk = Fluent::MemoryBufferChunk.new("my.tag")
     chunk << message.to_msgpack

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.4.1
+  version: 0.4.2
 platform: ruby
 authors:
 - Naoya Ito
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-03-24 00:00:00.000000000 Z
+date: 2017-03-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake