RubyGems - fluent-plugin-bigquery - Versions diffs - 0.4.1 → 0.4.2 - Mend

fluent-plugin-bigquery 0.4.1 → 0.4.2

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/fluent/plugin/bigquery/errors.rb +1 -1
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +32 -27
data/lib/fluent/plugin/out_bigquery.rb +13 -8
data/test/plugin/test_out_bigquery.rb +67 -67
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
-  data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
+  metadata.gz: fb46d9ded6ca44476f2a241a4a08f5abff3e99f4
+  data.tar.gz: 2729484cdd6de6edbd9636f0c01eeb69c9b0368b
 SHA512:
-  metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
-  data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
+  metadata.gz: aa84153cb3e53c093cc888f93ea211e1f6852f2f6a08ad7eab875438d7e7c0a5be8ab9b1c8b9c181d3655c981b47e756ed9adf06fbe71142f98bb9f128f773e2
+  data.tar.gz: a0fd64ab52abe46eccde000d364ce79dca01a3ae3d9dde48d36963ae4ca03bfe9e17dc913b10d5ea6706765dff2cbc0a8bc34df7b1ba1a3345accd60283478e0

data/lib/fluent/plugin/bigquery/errors.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Fluent
     class Error < StandardError
       RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
       RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
-      RETRYABLE_STATUS_CODE = [500, 503]
+      RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
       class << self
         def wrap(google_api_error, message = nil, force_unretryable: false)

data/lib/fluent/plugin/bigquery/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "0.4.1".freeze
+    VERSION = "0.4.2".freeze
   end
 end

data/lib/fluent/plugin/bigquery/writer.rb CHANGED Viewed

@@ -1,10 +1,10 @@
 module Fluent
   module BigQuery
     class Writer
-      def initialize(log, auth_method, auth_options = {})
+      def initialize(log, auth_method, options = {})
         @auth_method = auth_method
         @scope = "https://www.googleapis.com/auth/bigquery"
-        @auth_options = auth_options
+        @options = options
         @log = log
         @num_errors_per_chunk = {}
@@ -22,7 +22,7 @@ module Fluent
         @client = client
       end
-      def create_table(project, dataset, table_id, record_schema, time_partitioning_type: nil, time_partitioning_expiration: nil)
+      def create_table(project, dataset, table_id, record_schema)
         create_table_retry_limit = 3
         create_table_retry_wait = 1
         create_table_retry_count = 0
@@ -38,10 +38,10 @@ module Fluent
             }
           }
-          if time_partitioning_type
+          if @options[:time_partitioning_type]
             definition[:time_partitioning] = {
-              type: time_partitioning_type.to_s.upcase,
-              expiration_ms: time_partitioning_expiration ? time_partitioning_expiration * 1000 : nil
+              type: @options[:time_partitioning_type].to_s.upcase,
+              expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
             }.compact
           end
           client.insert_table(project, dataset, definition, {})
@@ -84,21 +84,21 @@ module Fluent
         nil
       end
-      def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
+      def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
         body = {
           rows: rows,
-          skip_invalid_rows: skip_invalid_rows,
-          ignore_unknown_values: ignore_unknown_values,
+          skip_invalid_rows: @options[:skip_invalid_rows],
+          ignore_unknown_values: @options[:ignore_unknown_values],
         }
         body.merge!(template_suffix: template_suffix) if template_suffix
         res = client.insert_all_table_data(project, dataset, table_id, body, {
-          options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
+          options: {timeout_sec: @options[:timeout_sec], open_timeout_sec: @options[:open_timeout_sec]}
         })
         log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
         if res.insert_errors && !res.insert_errors.empty?
           log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
-          if allow_retry_insert_errors
+          if @options[:allow_retry_insert_errors]
             is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
               insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
             end
@@ -118,7 +118,7 @@ module Fluent
         raise Fluent::BigQuery::Error.wrap(e)
       end
-      def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
+      def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields)
         configuration = {
           configuration: {
             load: {
@@ -132,14 +132,14 @@ module Fluent
               },
               write_disposition: "WRITE_APPEND",
               source_format: "NEWLINE_DELIMITED_JSON",
-              ignore_unknown_values: ignore_unknown_values,
-              max_bad_records: max_bad_records,
+              ignore_unknown_values: @options[:ignore_unknown_values],
+              max_bad_records: @options[:max_bad_records],
             }
           }
         }
-        job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
-        configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
+        job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
+        configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
         configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
         # If target table is already exist, omit schema configuration.
@@ -159,8 +159,8 @@ module Fluent
             upload_source: upload_source,
             content_type: "application/octet-stream",
             options: {
-              timeout_sec: timeout_sec,
-              open_timeout_sec: open_timeout_sec,
+              timeout_sec: @options[:timeout_sec],
+              open_timeout_sec: @options[:open_timeout_sec],
             }
           }
         )
@@ -172,14 +172,19 @@ module Fluent
         reason = e.respond_to?(:reason) ? e.reason : nil
         log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
-        if auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
+        if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
           # Table Not Found: Auto Create Table
-          create_table(project, dataset, table_id, fields, time_partitioning_type: time_partitioning_type, time_partitioning_expiration: time_partitioning_expiration)
+          create_table(
+            project,
+            dataset,
+            table_id,
+            fields,
+          )
           raise "table created. send rows next time."
         end
         if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
-          wait_load_job(chunk_id, project, dataset, job_id, table_id)
+          wait_load_job(chunk_id, project, dataset, job_id, table_id)
           @num_errors_per_chunk.delete(chunk_id)
           return
         end
@@ -242,9 +247,9 @@ module Fluent
       def get_auth_from_private_key
         require 'google/api_client/auth/key_utils'
-        private_key_path = @auth_options[:private_key_path]
-        private_key_passphrase = @auth_options[:private_key_passphrase]
-        email = @auth_options[:email]
+        private_key_path = @options[:private_key_path]
+        private_key_passphrase = @options[:private_key_passphrase]
+        email = @options[:email]
         key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
         Signet::OAuth2::Client.new(
@@ -261,7 +266,7 @@ module Fluent
       end
       def get_auth_from_json_key
-        json_key = @auth_options[:json_key]
+        json_key = @options[:json_key]
         begin
           JSON.parse(json_key)
@@ -283,8 +288,8 @@ module Fluent
         table_id.gsub(/\$\d+$/, "")
       end
-      def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
-        job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
+      def create_job_id(chunk_id, dataset, table, schema)
+        job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id]}"
         @log.debug "job_id_key: #{job_id_key}"
         "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
       end

data/lib/fluent/plugin/out_bigquery.rb CHANGED Viewed

@@ -288,6 +288,16 @@ module Fluent
         private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
         email: @email,
         json_key: @json_key,
+        skip_invalid_rows: @skip_invalid_rows,
+        ignore_unknown_values: @ignore_unknown_values,
+        max_bad_records: @max_bad_records,
+        allow_retry_insert_errors: @allow_retry_insert_errors,
+        prevent_duplicate_load: @prevent_duplicate_load,
+        auto_create_table: @auto_create_table,
+        time_partitioning_type: @time_partitioning_type,
+        time_partitioning_expiration: @time_partitioning_expiration,
+        timeout_sec: @request_timeout_sec,
+        open_timeout_sec: @request_open_timeout_sec,
       })
     end
@@ -427,11 +437,11 @@ module Fluent
       end
       def insert(table_id, rows, template_suffix)
-        writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
+        writer.insert_rows(@project, @dataset, table_id, rows, template_suffix: template_suffix)
       rescue Fluent::BigQuery::Error => e
         if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
           # Table Not Found: Auto Create Table
-          writer.create_table(@project, @dataset, table_id, @fields, time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration)
+          writer.create_table(@project, @dataset, table_id, @fields)
           raise "table created. send rows next time."
         end
@@ -473,12 +483,7 @@ module Fluent
         res = nil
         create_upload_source(chunk) do |upload_source|
-          res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
-            prevent_duplicate_load: @prevent_duplicate_load,
-            ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
-            timeout_sec: @request_timeout_sec,  open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
-            time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
-          })
+          res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields)
         end
       rescue Fluent::BigQuery::Error => e
         if e.retryable?

data/test/plugin/test_out_bigquery.rb CHANGED Viewed

@@ -754,10 +754,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
     driver = create_driver
     writer = stub_writer(driver)
-    mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, hash_including(
-      skip_invalid_rows: false,
-      ignore_unknown_values: false
-    ))
+    mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, template_suffix: nil)
     mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
       rows: entry,
       skip_invalid_rows: false,
@@ -780,62 +777,71 @@ class BigQueryOutputTest < Test::Unit::TestCase
   def test_write_with_retryable_error
     entry = {json: {a: "b"}}, {json: {b: "c"}}
-    driver = create_driver(<<-CONFIG)
-      table foo
-      email foo@bar.example
-      private_key_path /path/to/key
-      project yourproject_id
-      dataset yourdataset_id
-      time_format %s
-      time_field  time
-      schema [
-        {"name": "time", "type": "INTEGER"},
-        {"name": "status", "type": "INTEGER"},
-        {"name": "bytes", "type": "INTEGER"},
-        {"name": "vhost", "type": "STRING"},
-        {"name": "path", "type": "STRING"},
-        {"name": "method", "type": "STRING"},
-        {"name": "protocol", "type": "STRING"},
-        {"name": "agent", "type": "STRING"},
-        {"name": "referer", "type": "STRING"},
-        {"name": "remote", "type": "RECORD", "fields": [
-          {"name": "host", "type": "STRING"},
-          {"name": "ip", "type": "STRING"},
-          {"name": "user", "type": "STRING"}
-        ]},
-        {"name": "requesttime", "type": "FLOAT"},
-        {"name": "bot_access", "type": "BOOLEAN"},
-        {"name": "loginsession", "type": "BOOLEAN"}
-      ]
-      <secondary>
-        type file
-        path error
-        utc
-      </secondary>
-    CONFIG
+    data_input = [
+      { "status_code" => 500  },
+      { "status_code" => 502  },
+      { "status_code" => 503  },
+      { "status_code" => 504  },
+    ]
-    writer = stub_writer(driver)
-    mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
-      rows: entry,
-      skip_invalid_rows: false,
-      ignore_unknown_values: false
-    }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
-      ex = Google::Apis::ServerError.new("error", status_code: 500)
-      raise ex
-    end
+    data_input.each do |d|
+      driver = create_driver(<<-CONFIG)
+        table foo
+        email foo@bar.example
+        private_key_path /path/to/key
+        project yourproject_id
+        dataset yourdataset_id
-    chunk = Fluent::MemoryBufferChunk.new("my.tag")
-    entry.each do |e|
-      chunk << e.to_msgpack
-    end
+        time_format %s
+        time_field  time
-    driver.instance.start
-    assert_raise Fluent::BigQuery::RetryableError do
-      driver.instance.write(chunk)
-    end
-    driver.instance.shutdown
+          schema [
+            {"name": "time", "type": "INTEGER"},
+            {"name": "status", "type": "INTEGER"},
+            {"name": "bytes", "type": "INTEGER"},
+            {"name": "vhost", "type": "STRING"},
+            {"name": "path", "type": "STRING"},
+            {"name": "method", "type": "STRING"},
+            {"name": "protocol", "type": "STRING"},
+            {"name": "agent", "type": "STRING"},
+            {"name": "referer", "type": "STRING"},
+            {"name": "remote", "type": "RECORD", "fields": [
+              {"name": "host", "type": "STRING"},
+              {"name": "ip", "type": "STRING"},
+              {"name": "user", "type": "STRING"}
+            ]},
+            {"name": "requesttime", "type": "FLOAT"},
+            {"name": "bot_access", "type": "BOOLEAN"},
+            {"name": "loginsession", "type": "BOOLEAN"}
+          ]
+          <secondary>
+            type file
+            path error
+            utc
+          </secondary>
+        CONFIG
+        writer = stub_writer(driver)
+        mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
+          rows: entry,
+          skip_invalid_rows: false,
+          ignore_unknown_values: false
+        }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
+          ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
+          raise ex
+        end
+        chunk = Fluent::MemoryBufferChunk.new("my.tag")
+        entry.each do |e|
+          chunk << e.to_msgpack
+        end
+        driver.instance.start
+        assert_raise Fluent::BigQuery::RetryableError do
+          driver.instance.write(chunk)
+        end
+        driver.instance.shutdown
+      end
   end
   def test_write_with_not_retryable_error
@@ -1455,11 +1461,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
       schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
     CONFIG
     writer = stub_writer(driver)
-    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
-      skip_invalid_rows: false,
-      ignore_unknown_values: false,
-    )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
-    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: nil, time_partitioning_expiration: nil)
+    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
+    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
     chunk = Fluent::MemoryBufferChunk.new("my.tag")
     chunk << message.to_msgpack
@@ -1517,11 +1520,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
       time_partitioning_expiration 1h
     CONFIG
     writer = stub_writer(driver)
-    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
-      skip_invalid_rows: false,
-      ignore_unknown_values: false,
-    )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
-    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: :day, time_partitioning_expiration: 3600)
+    mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
+    mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
     chunk = Fluent::MemoryBufferChunk.new("my.tag")
     chunk << message.to_msgpack

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.4.1
+  version: 0.4.2
 platform: ruby
 authors:
 - Naoya Ito
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-03-24 00:00:00.000000000 Z
+date: 2017-03-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake