fluent-plugin-bigquery 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
4
- data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
3
+ metadata.gz: fb46d9ded6ca44476f2a241a4a08f5abff3e99f4
4
+ data.tar.gz: 2729484cdd6de6edbd9636f0c01eeb69c9b0368b
5
5
  SHA512:
6
- metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
7
- data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
6
+ metadata.gz: aa84153cb3e53c093cc888f93ea211e1f6852f2f6a08ad7eab875438d7e7c0a5be8ab9b1c8b9c181d3655c981b47e756ed9adf06fbe71142f98bb9f128f773e2
7
+ data.tar.gz: a0fd64ab52abe46eccde000d364ce79dca01a3ae3d9dde48d36963ae4ca03bfe9e17dc913b10d5ea6706765dff2cbc0a8bc34df7b1ba1a3345accd60283478e0
@@ -4,7 +4,7 @@ module Fluent
4
4
  class Error < StandardError
5
5
  RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
6
6
  RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
7
- RETRYABLE_STATUS_CODE = [500, 503]
7
+ RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
8
 
9
9
  class << self
10
10
  def wrap(google_api_error, message = nil, force_unretryable: false)
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.4.1".freeze
3
+ VERSION = "0.4.2".freeze
4
4
  end
5
5
  end
@@ -1,10 +1,10 @@
1
1
  module Fluent
2
2
  module BigQuery
3
3
  class Writer
4
- def initialize(log, auth_method, auth_options = {})
4
+ def initialize(log, auth_method, options = {})
5
5
  @auth_method = auth_method
6
6
  @scope = "https://www.googleapis.com/auth/bigquery"
7
- @auth_options = auth_options
7
+ @options = options
8
8
  @log = log
9
9
  @num_errors_per_chunk = {}
10
10
 
@@ -22,7 +22,7 @@ module Fluent
22
22
  @client = client
23
23
  end
24
24
 
25
- def create_table(project, dataset, table_id, record_schema, time_partitioning_type: nil, time_partitioning_expiration: nil)
25
+ def create_table(project, dataset, table_id, record_schema)
26
26
  create_table_retry_limit = 3
27
27
  create_table_retry_wait = 1
28
28
  create_table_retry_count = 0
@@ -38,10 +38,10 @@ module Fluent
38
38
  }
39
39
  }
40
40
 
41
- if time_partitioning_type
41
+ if @options[:time_partitioning_type]
42
42
  definition[:time_partitioning] = {
43
- type: time_partitioning_type.to_s.upcase,
44
- expiration_ms: time_partitioning_expiration ? time_partitioning_expiration * 1000 : nil
43
+ type: @options[:time_partitioning_type].to_s.upcase,
44
+ expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
45
45
  }.compact
46
46
  end
47
47
  client.insert_table(project, dataset, definition, {})
@@ -84,21 +84,21 @@ module Fluent
84
84
  nil
85
85
  end
86
86
 
87
- def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
87
+ def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
88
88
  body = {
89
89
  rows: rows,
90
- skip_invalid_rows: skip_invalid_rows,
91
- ignore_unknown_values: ignore_unknown_values,
90
+ skip_invalid_rows: @options[:skip_invalid_rows],
91
+ ignore_unknown_values: @options[:ignore_unknown_values],
92
92
  }
93
93
  body.merge!(template_suffix: template_suffix) if template_suffix
94
94
  res = client.insert_all_table_data(project, dataset, table_id, body, {
95
- options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
95
+ options: {timeout_sec: @options[:timeout_sec], open_timeout_sec: @options[:open_timeout_sec]}
96
96
  })
97
97
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
98
98
 
99
99
  if res.insert_errors && !res.insert_errors.empty?
100
100
  log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
101
- if allow_retry_insert_errors
101
+ if @options[:allow_retry_insert_errors]
102
102
  is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
103
103
  insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
104
104
  end
@@ -118,7 +118,7 @@ module Fluent
118
118
  raise Fluent::BigQuery::Error.wrap(e)
119
119
  end
120
120
 
121
- def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
121
+ def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields)
122
122
  configuration = {
123
123
  configuration: {
124
124
  load: {
@@ -132,14 +132,14 @@ module Fluent
132
132
  },
133
133
  write_disposition: "WRITE_APPEND",
134
134
  source_format: "NEWLINE_DELIMITED_JSON",
135
- ignore_unknown_values: ignore_unknown_values,
136
- max_bad_records: max_bad_records,
135
+ ignore_unknown_values: @options[:ignore_unknown_values],
136
+ max_bad_records: @options[:max_bad_records],
137
137
  }
138
138
  }
139
139
  }
140
140
 
141
- job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
142
- configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
141
+ job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
142
+ configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
143
143
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
144
144
 
145
145
  # If target table is already exist, omit schema configuration.
@@ -159,8 +159,8 @@ module Fluent
159
159
  upload_source: upload_source,
160
160
  content_type: "application/octet-stream",
161
161
  options: {
162
- timeout_sec: timeout_sec,
163
- open_timeout_sec: open_timeout_sec,
162
+ timeout_sec: @options[:timeout_sec],
163
+ open_timeout_sec: @options[:open_timeout_sec],
164
164
  }
165
165
  }
166
166
  )
@@ -172,14 +172,19 @@ module Fluent
172
172
  reason = e.respond_to?(:reason) ? e.reason : nil
173
173
  log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
174
174
 
175
- if auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
175
+ if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
176
176
  # Table Not Found: Auto Create Table
177
- create_table(project, dataset, table_id, fields, time_partitioning_type: time_partitioning_type, time_partitioning_expiration: time_partitioning_expiration)
177
+ create_table(
178
+ project,
179
+ dataset,
180
+ table_id,
181
+ fields,
182
+ )
178
183
  raise "table created. send rows next time."
179
184
  end
180
185
 
181
186
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
182
- wait_load_job(chunk_id, project, dataset, job_id, table_id)
187
+ wait_load_job(chunk_id, project, dataset, job_id, table_id)
183
188
  @num_errors_per_chunk.delete(chunk_id)
184
189
  return
185
190
  end
@@ -242,9 +247,9 @@ module Fluent
242
247
 
243
248
  def get_auth_from_private_key
244
249
  require 'google/api_client/auth/key_utils'
245
- private_key_path = @auth_options[:private_key_path]
246
- private_key_passphrase = @auth_options[:private_key_passphrase]
247
- email = @auth_options[:email]
250
+ private_key_path = @options[:private_key_path]
251
+ private_key_passphrase = @options[:private_key_passphrase]
252
+ email = @options[:email]
248
253
 
249
254
  key = Google::APIClient::KeyUtils.load_from_pkcs12(private_key_path, private_key_passphrase)
250
255
  Signet::OAuth2::Client.new(
@@ -261,7 +266,7 @@ module Fluent
261
266
  end
262
267
 
263
268
  def get_auth_from_json_key
264
- json_key = @auth_options[:json_key]
269
+ json_key = @options[:json_key]
265
270
 
266
271
  begin
267
272
  JSON.parse(json_key)
@@ -283,8 +288,8 @@ module Fluent
283
288
  table_id.gsub(/\$\d+$/, "")
284
289
  end
285
290
 
286
- def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
287
- job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
291
+ def create_job_id(chunk_id, dataset, table, schema)
292
+ job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id]}"
288
293
  @log.debug "job_id_key: #{job_id_key}"
289
294
  "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
290
295
  end
@@ -288,6 +288,16 @@ module Fluent
288
288
  private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
289
289
  email: @email,
290
290
  json_key: @json_key,
291
+ skip_invalid_rows: @skip_invalid_rows,
292
+ ignore_unknown_values: @ignore_unknown_values,
293
+ max_bad_records: @max_bad_records,
294
+ allow_retry_insert_errors: @allow_retry_insert_errors,
295
+ prevent_duplicate_load: @prevent_duplicate_load,
296
+ auto_create_table: @auto_create_table,
297
+ time_partitioning_type: @time_partitioning_type,
298
+ time_partitioning_expiration: @time_partitioning_expiration,
299
+ timeout_sec: @request_timeout_sec,
300
+ open_timeout_sec: @request_open_timeout_sec,
291
301
  })
292
302
  end
293
303
 
@@ -427,11 +437,11 @@ module Fluent
427
437
  end
428
438
 
429
439
  def insert(table_id, rows, template_suffix)
430
- writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
440
+ writer.insert_rows(@project, @dataset, table_id, rows, template_suffix: template_suffix)
431
441
  rescue Fluent::BigQuery::Error => e
432
442
  if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
433
443
  # Table Not Found: Auto Create Table
434
- writer.create_table(@project, @dataset, table_id, @fields, time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration)
444
+ writer.create_table(@project, @dataset, table_id, @fields)
435
445
  raise "table created. send rows next time."
436
446
  end
437
447
 
@@ -473,12 +483,7 @@ module Fluent
473
483
  res = nil
474
484
 
475
485
  create_upload_source(chunk) do |upload_source|
476
- res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
477
- prevent_duplicate_load: @prevent_duplicate_load,
478
- ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
479
- timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
480
- time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
481
- })
486
+ res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields)
482
487
  end
483
488
  rescue Fluent::BigQuery::Error => e
484
489
  if e.retryable?
@@ -754,10 +754,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
754
754
  driver = create_driver
755
755
 
756
756
  writer = stub_writer(driver)
757
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, hash_including(
758
- skip_invalid_rows: false,
759
- ignore_unknown_values: false
760
- ))
757
+ mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, template_suffix: nil)
761
758
  mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
762
759
  rows: entry,
763
760
  skip_invalid_rows: false,
@@ -780,62 +777,71 @@ class BigQueryOutputTest < Test::Unit::TestCase
780
777
 
781
778
  def test_write_with_retryable_error
782
779
  entry = {json: {a: "b"}}, {json: {b: "c"}}
783
- driver = create_driver(<<-CONFIG)
784
- table foo
785
- email foo@bar.example
786
- private_key_path /path/to/key
787
- project yourproject_id
788
- dataset yourdataset_id
789
-
790
- time_format %s
791
- time_field time
792
-
793
- schema [
794
- {"name": "time", "type": "INTEGER"},
795
- {"name": "status", "type": "INTEGER"},
796
- {"name": "bytes", "type": "INTEGER"},
797
- {"name": "vhost", "type": "STRING"},
798
- {"name": "path", "type": "STRING"},
799
- {"name": "method", "type": "STRING"},
800
- {"name": "protocol", "type": "STRING"},
801
- {"name": "agent", "type": "STRING"},
802
- {"name": "referer", "type": "STRING"},
803
- {"name": "remote", "type": "RECORD", "fields": [
804
- {"name": "host", "type": "STRING"},
805
- {"name": "ip", "type": "STRING"},
806
- {"name": "user", "type": "STRING"}
807
- ]},
808
- {"name": "requesttime", "type": "FLOAT"},
809
- {"name": "bot_access", "type": "BOOLEAN"},
810
- {"name": "loginsession", "type": "BOOLEAN"}
811
- ]
812
- <secondary>
813
- type file
814
- path error
815
- utc
816
- </secondary>
817
- CONFIG
780
+ data_input = [
781
+ { "status_code" => 500 },
782
+ { "status_code" => 502 },
783
+ { "status_code" => 503 },
784
+ { "status_code" => 504 },
785
+ ]
818
786
 
819
- writer = stub_writer(driver)
820
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
821
- rows: entry,
822
- skip_invalid_rows: false,
823
- ignore_unknown_values: false
824
- }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
825
- ex = Google::Apis::ServerError.new("error", status_code: 500)
826
- raise ex
827
- end
787
+ data_input.each do |d|
788
+ driver = create_driver(<<-CONFIG)
789
+ table foo
790
+ email foo@bar.example
791
+ private_key_path /path/to/key
792
+ project yourproject_id
793
+ dataset yourdataset_id
828
794
 
829
- chunk = Fluent::MemoryBufferChunk.new("my.tag")
830
- entry.each do |e|
831
- chunk << e.to_msgpack
832
- end
795
+ time_format %s
796
+ time_field time
833
797
 
834
- driver.instance.start
835
- assert_raise Fluent::BigQuery::RetryableError do
836
- driver.instance.write(chunk)
837
- end
838
- driver.instance.shutdown
798
+ schema [
799
+ {"name": "time", "type": "INTEGER"},
800
+ {"name": "status", "type": "INTEGER"},
801
+ {"name": "bytes", "type": "INTEGER"},
802
+ {"name": "vhost", "type": "STRING"},
803
+ {"name": "path", "type": "STRING"},
804
+ {"name": "method", "type": "STRING"},
805
+ {"name": "protocol", "type": "STRING"},
806
+ {"name": "agent", "type": "STRING"},
807
+ {"name": "referer", "type": "STRING"},
808
+ {"name": "remote", "type": "RECORD", "fields": [
809
+ {"name": "host", "type": "STRING"},
810
+ {"name": "ip", "type": "STRING"},
811
+ {"name": "user", "type": "STRING"}
812
+ ]},
813
+ {"name": "requesttime", "type": "FLOAT"},
814
+ {"name": "bot_access", "type": "BOOLEAN"},
815
+ {"name": "loginsession", "type": "BOOLEAN"}
816
+ ]
817
+ <secondary>
818
+ type file
819
+ path error
820
+ utc
821
+ </secondary>
822
+ CONFIG
823
+
824
+ writer = stub_writer(driver)
825
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
826
+ rows: entry,
827
+ skip_invalid_rows: false,
828
+ ignore_unknown_values: false
829
+ }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
830
+ ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
831
+ raise ex
832
+ end
833
+
834
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
835
+ entry.each do |e|
836
+ chunk << e.to_msgpack
837
+ end
838
+
839
+ driver.instance.start
840
+ assert_raise Fluent::BigQuery::RetryableError do
841
+ driver.instance.write(chunk)
842
+ end
843
+ driver.instance.shutdown
844
+ end
839
845
  end
840
846
 
841
847
  def test_write_with_not_retryable_error
@@ -1455,11 +1461,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
1455
1461
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
1456
1462
  CONFIG
1457
1463
  writer = stub_writer(driver)
1458
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
1459
- skip_invalid_rows: false,
1460
- ignore_unknown_values: false,
1461
- )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1462
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: nil, time_partitioning_expiration: nil)
1464
+ mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1465
+ mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
1463
1466
 
1464
1467
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1465
1468
  chunk << message.to_msgpack
@@ -1517,11 +1520,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
1517
1520
  time_partitioning_expiration 1h
1518
1521
  CONFIG
1519
1522
  writer = stub_writer(driver)
1520
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
1521
- skip_invalid_rows: false,
1522
- ignore_unknown_values: false,
1523
- )) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1524
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: :day, time_partitioning_expiration: 3600)
1523
+ mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
1524
+ mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
1525
1525
 
1526
1526
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1527
1527
  chunk << message.to_msgpack
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-03-24 00:00:00.000000000 Z
12
+ date: 2017-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake