fluent-plugin-bigquery 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
4
- data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
3
+ metadata.gz: fb46d9ded6ca44476f2a241a4a08f5abff3e99f4
4
+ data.tar.gz: 2729484cdd6de6edbd9636f0c01eeb69c9b0368b
5
5
  SHA512:
6
- metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
7
- data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
6
+ metadata.gz: aa84153cb3e53c093cc888f93ea211e1f6852f2f6a08ad7eab875438d7e7c0a5be8ab9b1c8b9c181d3655c981b47e756ed9adf06fbe71142f98bb9f128f773e2
7
+ data.tar.gz: a0fd64ab52abe46eccde000d364ce79dca01a3ae3d9dde48d36963ae4ca03bfe9e17dc913b10d5ea6706765dff2cbc0a8bc34df7b1ba1a3345accd60283478e0
@@ -4,7 +4,7 @@ module Fluent
4
4
  class Error < StandardError
5
5
  RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
6
6
  RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
7
- RETRYABLE_STATUS_CODE = [500, 503]
7
+ RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
8
 
9
9
  class << self
10
10
  def wrap(google_api_error, message = nil, force_unretryable: false)
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.4.1".freeze
3
+ VERSION = "0.4.2".freeze
4
4
  end
5
5
  end
@@ -1,10 +1,10 @@
1
1
  module Fluent
2
2
  module BigQuery
3
3
  class Writer
4
- def initialize(log, auth_method, auth_options = {})
4
+ def initialize(log, auth_method, options = {})
5
5
  @auth_method = auth_method
6
6
  @scope = "https://www.googleapis.com/auth/bigquery"
7
- @auth_options = auth_options
7
+ @options = options
8
8
  @log = log
9
9
  @num_errors_per_chunk = {}
10
10
 
@@ -22,7 +22,7 @@ module Fluent
22
22
  @client = client
23
23
  end
24
24
 
25
- def create_table(project, dataset, table_id, record_schema, time_partitioning_type: nil, time_partitioning_expiration: nil)
25
+ def create_table(project, dataset, table_id, record_schema)
26
26
  create_table_retry_limit = 3
27
27
  create_table_retry_wait = 1
28
28
  create_table_retry_count = 0
@@ -38,10 +38,10 @@ module Fluent
38
38
  }
39
39
  }
40
40
 
41
- if time_partitioning_type
41
+ if @options[:time_partitioning_type]
42
42
  definition[:time_partitioning] = {
43
- type: time_partitioning_type.to_s.upcase,
44
- expiration_ms: time_partitioning_expiration ? time_partitioning_expiration * 1000 : nil
43
+ type: @options[:time_partitioning_type].to_s.upcase,
44
+ expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
45
45
  }.compact
46
46
  end
47
47
  client.insert_table(project, dataset, definition, {})
@@ -84,21 +84,21 @@ module Fluent
84
84
  nil
85
85
  end
86
86
 
87
- def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
87
+ def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
88
88
  body = {
89
89
  rows: rows,
90
- skip_invalid_rows: skip_invalid_rows,
91
- ignore_unknown_values: ignore_unknown_values,
90
+ skip_invalid_rows: @options[:skip_invalid_rows],
91
+ ignore_unknown_values: @options[:ignore_unknown_values],
92
92
  }
93
93
  body.merge!(template_suffix: template_suffix) if template_suffix
94
94
  res = client.insert_all_table_data(project, dataset, table_id, body, {
95
- options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
95
+ options: {timeout_sec: @options[:timeout_sec], open_timeout_sec: @options[:open_timeout_sec]}
96
96
  })
97
97
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
98
98
 
99
99
  if res.insert_errors && !res.insert_errors.empty?
100
100
  log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
101
- if allow_retry_insert_errors
101
+ if @options[:allow_retry_insert_errors]
102
102
  is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
103
103
  insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
104
104
  end
@@ -118,7 +118,7 @@ module Fluent
118
118
  raise Fluent::BigQuery::Error.wrap(e)
119
119
  end
120
120
 
121
- def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
121
+ def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields)
122
122
  configuration = {
123
123
  configuration: {
124
124
  load: {
@@ -132,14 +132,14 @@ module Fluent
132
132
  },
133
133
  write_disposition: "WRITE_APPEND",
134
134
  source_format: "NEWLINE_DELIMITED_JSON",
135
- ignore_unknown_values: ignore_unknown_values,
136
- max_bad_records: max_bad_records,
135
+ ignore_unknown_values: @options[:ignore_unknown_values],
136
+ max_bad_records: @options[:max_bad_records],
137
137
  }
138
138
  }
139
139
  }
140
140
 
141
- job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
142
- configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
141
+ job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
142
+ configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
143
143
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
144
144
 
145
145
  # If target table is already exist, omit schema configuration.
@@ -159,8 +159,8 @@ module Fluent
159
159
  upload_source: upload_source,
160
160
  content_type: "application/octet-stream",
161
161
  options: {
162
- timeout_sec: timeout_sec,
163
- open_timeout_sec: open_timeout_sec,
162
+ timeout_sec: @options[:timeout_sec],
163
+ open_timeout_sec: @options[:open_timeout_sec],
164
164
  }
165
165
  }
166
166
  )
@@ -172,14 +172,19 @@ module Fluent
172
172
  reason = e.respond_to?(:reason) ? e.reason : nil
173
173
  log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
174
174
 
175
- if auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
175
+ if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
176
176
  # Table Not Found: Auto Create Table
177
- create_table(project, dataset, table_id, fields, time_partitioning_type: time_partitioning_type, time_partitioning_expiration: time_partitioning_expiration)
177
+ create_table(
178
+ project,
179
+ dataset,
180
+ table_id,
181
+ fields,
182
+ )
178
183
  raise "table created. send rows next time."
179
184
  end
180
185
 
181
186
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
182
- wait_load_job(chunk_id, project, dataset, job_id, table_id)
187
+ wait_load_job(chunk_id, project, dataset, job_id, table_id)
183
188
  @num_errors_per_chunk.delete(chunk_id)
184
189
  return
185
190
  end
@@ -242,9 +247,9 @@ module Fluent
242
247
 
243
248
  def get_auth_from_private_key
244
249
  require 'google/api_client/auth/key_utils'
245
- private_key_path = @auth_options[:private_key_path]
246
- private_key_passphrase = @auth_options[:private_key_passphrase]
247
- email = @auth_options[:email]
250
+ private_key_path = @options[:private_key_path]
251
+ private_key_passphrase = @options[:private_key_passphrase]
252
+ email = @options[:email]
248
253
 
249
254
  key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
250
255
  Signet::OAuth2::Client.new(
@@ -261,7 +266,7 @@ module Fluent
261
266
  end
262
267
 
263
268
  def get_auth_from_json_key
264
- json_key = @auth_options[:json_key]
269
+ json_key = @options[:json_key]
265
270
 
266
271
  begin
267
272
  JSON.parse(json_key)
@@ -283,8 +288,8 @@ module Fluent
283
288
  table_id.gsub(/\$\d+$/, "")
284
289
  end
285
290
 
286
- def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
287
- job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
291
+ def create_job_id(chunk_id, dataset, table, schema)
292
+ job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id]}"
288
293
  @log.debug "job_id_key: #{job_id_key}"
289
294
  "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
290
295
  end
@@ -288,6 +288,16 @@ module Fluent
288
288
  private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
289
289
  email: @email,
290
290
  json_key: @json_key,
291
+ skip_invalid_rows: @skip_invalid_rows,
292
+ ignore_unknown_values: @ignore_unknown_values,
293
+ max_bad_records: @max_bad_records,
294
+ allow_retry_insert_errors: @allow_retry_insert_errors,
295
+ prevent_duplicate_load: @prevent_duplicate_load,
296
+ auto_create_table: @auto_create_table,
297
+ time_partitioning_type: @time_partitioning_type,
298
+ time_partitioning_expiration: @time_partitioning_expiration,
299
+ timeout_sec: @request_timeout_sec,
300
+ open_timeout_sec: @request_open_timeout_sec,
291
301
  })
292
302
  end
293
303
 
@@ -427,11 +437,11 @@ module Fluent
427
437
  end
428
438
 
429
439
  def insert(table_id, rows, template_suffix)
430
- writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
440
+ writer.insert_rows(@project, @dataset, table_id, rows, template_suffix: template_suffix)
431
441
  rescue Fluent::BigQuery::Error => e
432
442
  if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
433
443
  # Table Not Found: Auto Create Table
434
- writer.create_table(@project, @dataset, table_id, @fields, time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration)
444
+ writer.create_table(@project, @dataset, table_id, @fields)
435
445
  raise "table created. send rows next time."
436
446
  end
437
447
 
@@ -473,12 +483,7 @@ module Fluent
473
483
  res = nil
474
484
 
475
485
  create_upload_source(chunk) do |upload_source|
476
- res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
477
- prevent_duplicate_load: @prevent_duplicate_load,
478
- ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
479
- timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
480
- time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
481
- })
486
+ res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields)
482
487
  end
483
488
  rescue Fluent::BigQuery::Error => e
484
489
  if e.retryable?
@@ -754,10 +754,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
754
754
  driver = create_driver
755
755
 
756
756
  writer = stub_writer(driver)
757
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, hash_including(
758
- skip_invalid_rows: false,
759
- ignore_unknown_values: false
760
- ))
757
+ mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, template_suffix: nil)
761
758
  mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
762
759
  rows: entry,
763
760
  skip_invalid_rows: false,
@@ -780,62 +777,71 @@ class BigQueryOutputTest < Test::Unit::TestCase
780
777
 
781
778
  def test_write_with_retryable_error
782
779
  entry = {json: {a: "b"}}, {json: {b: "c"}}
783
- driver = create_driver(<<-CONFIG)
784
- table foo
785
- email foo@bar.example
786
- private_key_path /path/to/key
787
- project yourproject_id
788
- dataset yourdataset_id
789
-
790
- time_format %s
791
- time_field time
792
-
793
- schema [
794
- {"name": "time", "type": "INTEGER"},
795
- {"name": "status", "type": "INTEGER"},
796
- {"name": "bytes", "type": "INTEGER"},
797
- {"name": "vhost", "type": "STRING"},
798
- {"name": "path", "type": "STRING"},
799
- {"name": "method", "type": "STRING"},
800
- {"name": "protocol", "type": "STRING"},
801
- {"name": "agent", "type": "STRING"},
802
- {"name": "referer", "type": "STRING"},
803
- {"name": "remote", "type": "RECORD", "fields": [
804
- {"name": "host", "type": "STRING"},
805
- {"name": "ip", "type": "STRING"},
806
- {"name": "user", "type": "STRING"}
807
- ]},
808
- {"name": "requesttime", "type": "FLOAT"},
809
- {"name": "bot_access", "type": "BOOLEAN"},
810
- {"name": "loginsession", "type": "BOOLEAN"}
811
- ]
812
- <secondary>
813
- type file
814
- path error
815
- utc
816
- </secondary>
817
- CONFIG
780
+ data_input = [
781
+ { "status_code" => 500 },
782
+ { "status_code" => 502 },
783
+ { "status_code" => 503 },
784
+ { "status_code" => 504 },
785
+ ]
818
786
 
819
- writer = stub_writer(driver)
820
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
821
- rows: entry,
822
- skip_invalid_rows: false,
823
- ignore_unknown_values: false
824
- }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
825
- ex = Google::Apis::ServerError.new("error", status_code: 500)
826
- raise ex
827
- end
787
+ data_input.each do |d|
788
+ driver = create_driver(<<-CONFIG)
789
+ table foo
790
+ email foo@bar.example
791
+ private_key_path /path/to/key
792
+ project yourproject_id
793
+ dataset yourdataset_id
828
794
 
829
- chunk = Fluent::MemoryBufferChunk.new("my.tag")
830
- entry.each do |e|
831
- chunk << e.to_msgpack
832
- end
795
+ time_format %s
796
+ time_field time
833
797
 
834
- driver.instance.start
835
- assert_raise Fluent::BigQuery::RetryableError do
836
- driver.instance.write(chunk)
837
- end
838
- driver.instance.shutdown
798
+ schema [
799
+ {"name": "time", "type": "INTEGER"},
800
+ {"name": "status", "type": "INTEGER"},
801
+ {"name": "bytes", "type": "INTEGER"},
802
+ {"name": "vhost", "type": "STRING"},
803
+ {"name": "path", "type": "STRING"},
804
+ {"name": "method", "type": "STRING"},
805
+ {"name": "protocol", "type": "STRING"},
806
+ {"name": "agent", "type": "STRING"},
807
+ {"name": "referer", "type": "STRING"},
808
+ {"name": "remote", "type": "RECORD", "fields": [
809
+ {"name": "host", "type": "STRING"},
810
+ {"name": "ip", "type": "STRING"},
811
+ {"name": "user", "type": "STRING"}
812
+ ]},
813
+ {"name": "requesttime", "type": "FLOAT"},
814
+ {"name": "bot_access", "type": "BOOLEAN"},
815
+ {"name": "loginsession", "type": "BOOLEAN"}
816
+ ]
817
+ <secondary>
818
+ type file
819
+ path error
820
+ utc
821
+ </secondary>
822
+ CONFIG
823
+
824
+ writer = stub_writer(driver)
825
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
826
+ rows: entry,
827
+ skip_invalid_rows: false,
828
+ ignore_unknown_values: false
829
+ }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
830
+ ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
831
+ raise ex
832
+ end
833
+
834
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
835
+ entry.each do |e|
836
+ chunk << e.to_msgpack
837
+ end
838
+
839
+ driver.instance.start
840
+ assert_raise Fluent::BigQuery::RetryableError do
841
+ driver.instance.write(chunk)
842
+ end
843
+ driver.instance.shutdown
844
+ end
839
845
  end
840
846
 
841
847
  def test_write_with_not_retryable_error
@@ -1455,11 +1461,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
1455
1461
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
1456
1462
  CONFIG
1457
1463
  writer = stub_writer(driver)
1458
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
1459
- skip_invalid_rows: false,
1460
- ignore_unknown_values: false,
1461
- )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1462
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: nil, time_partitioning_expiration: nil)
1464
+ mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1465
+ mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
1463
1466
 
1464
1467
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1465
1468
  chunk << message.to_msgpack
@@ -1517,11 +1520,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
1517
1520
  time_partitioning_expiration 1h
1518
1521
  CONFIG
1519
1522
  writer = stub_writer(driver)
1520
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
1521
- skip_invalid_rows: false,
1522
- ignore_unknown_values: false,
1523
- )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1524
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: :day, time_partitioning_expiration: 3600)
1523
+ mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1524
+ mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
1525
1525
 
1526
1526
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1527
1527
  chunk << message.to_msgpack
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-03-24 00:00:00.000000000 Z
12
+ date: 2017-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake