fluent-plugin-bigquery 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/fluent-plugin-bigquery.gemspec +2 -2
- data/lib/fluent/plugin/bigquery/errors.rb +83 -0
- data/lib/fluent/plugin/bigquery/version.rb +1 -2
- data/lib/fluent/plugin/bigquery/writer.rb +8 -55
- data/lib/fluent/plugin/out_bigquery.rb +3 -6
- data/test/plugin/test_out_bigquery.rb +6 -9
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66cdf151a1711b82f972e108d736638c017637ca
|
4
|
+
data.tar.gz: 29de9f545e896319d26105753a8569a60d411441
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e413ccab59cb232dae2b2808bb4637df291eb96a0540a98387aff8397a0fb39b0f7585c6c0d565d6f2d5851b516737e7308deb9c11572a303af55832f49cd13
|
7
|
+
data.tar.gz: f7fdfa22e82ff9f5748782e8b62429c005467bfec9ff3ea0133b2cd53f71359d90789275d42859b96f2d583830a1a6160916a0452ad9efa5a57c559fe07af366
|
@@ -6,8 +6,8 @@ require 'fluent/plugin/bigquery/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "fluent-plugin-bigquery"
|
8
8
|
spec.version = Fluent::BigQueryPlugin::VERSION
|
9
|
-
spec.authors = ["Naoya Ito"]
|
10
|
-
spec.email = ["i.naoya@gmail.com"]
|
9
|
+
spec.authors = ["Naoya Ito", "joker1007"]
|
10
|
+
spec.email = ["i.naoya@gmail.com", "kakyoin.hierophant@gmail.com"]
|
11
11
|
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
12
12
|
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
13
13
|
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Fluent
|
2
|
+
module BigQuery
|
3
|
+
# @abstract
|
4
|
+
class Error < StandardError
|
5
|
+
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
6
|
+
RETRYABLE_STATUS_CODE = [500, 503]
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def wrap(google_api_error, message = nil, force_unretryable: false)
|
10
|
+
e = google_api_error
|
11
|
+
return UnRetryableError.new(message, e) if force_unretryable
|
12
|
+
|
13
|
+
if retryable_error?(e)
|
14
|
+
RetryableError.new(message, e)
|
15
|
+
else
|
16
|
+
UnRetryableError.new(message, e)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def retryable_error?(google_api_error)
|
21
|
+
e = google_api_error
|
22
|
+
reason = e.respond_to?(:reason) ? e.reason : nil
|
23
|
+
|
24
|
+
retryable_error_reason?(reason) ||
|
25
|
+
(e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
|
26
|
+
end
|
27
|
+
|
28
|
+
def retryable_error_reason?(reason)
|
29
|
+
RETRYABLE_ERROR_REASON.include?(reason)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Guard for instantiation
|
33
|
+
private :new
|
34
|
+
def inherited(subclass)
|
35
|
+
subclass.class_eval do
|
36
|
+
class << self
|
37
|
+
public :new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_reader :origin
|
44
|
+
|
45
|
+
def initialize(message, origin = nil)
|
46
|
+
@origin = origin
|
47
|
+
super(message || origin.message)
|
48
|
+
end
|
49
|
+
|
50
|
+
def method_missing(name, *args)
|
51
|
+
if @origin
|
52
|
+
@origin.send(name, *args)
|
53
|
+
else
|
54
|
+
super
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def reason
|
59
|
+
@origin && @origin.respond_to?(:reason) ? @origin.reason : nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def status_code
|
63
|
+
@origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def body
|
67
|
+
@origin && @origin.respond_to?(:body) ? @origin.body : nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def retryable?
|
71
|
+
false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class UnRetryableError < Error; end
|
76
|
+
|
77
|
+
class RetryableError < Error
|
78
|
+
def retryable?
|
79
|
+
true
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -1,45 +1,6 @@
|
|
1
1
|
module Fluent
|
2
2
|
module BigQuery
|
3
3
|
class Writer
|
4
|
-
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
5
|
-
|
6
|
-
class Error < StandardError
|
7
|
-
attr_reader :origin
|
8
|
-
|
9
|
-
def initialize(message, origin = nil)
|
10
|
-
@origin = origin
|
11
|
-
super(message || origin.message)
|
12
|
-
end
|
13
|
-
|
14
|
-
def method_missing(name, *args)
|
15
|
-
if @origin
|
16
|
-
@origin.send(name, *args)
|
17
|
-
else
|
18
|
-
super
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def reason
|
23
|
-
@origin && @origin.respond_to?(:reason) ? @origin.reason : nil
|
24
|
-
end
|
25
|
-
|
26
|
-
def status_code
|
27
|
-
@origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
|
28
|
-
end
|
29
|
-
|
30
|
-
def retryable?
|
31
|
-
false
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
class UnRetryableError < Error; end
|
36
|
-
|
37
|
-
class RetryableError < Error
|
38
|
-
def retryable?
|
39
|
-
true
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
4
|
def initialize(log, auth_method, auth_options = {})
|
44
5
|
@auth_method = auth_method
|
45
6
|
@scope = "https://www.googleapis.com/auth/bigquery"
|
@@ -97,13 +58,13 @@ module Fluent
|
|
97
58
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
98
59
|
log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
|
99
60
|
|
100
|
-
if
|
61
|
+
if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
|
101
62
|
sleep create_table_retry_wait
|
102
63
|
create_table_retry_wait *= 2
|
103
64
|
create_table_retry_count += 1
|
104
65
|
retry
|
105
66
|
else
|
106
|
-
raise UnRetryableError.new("failed to create table in bigquery", e)
|
67
|
+
raise Fluent::BigQuery::UnRetryableError.new("failed to create table in bigquery", e)
|
107
68
|
end
|
108
69
|
end
|
109
70
|
end
|
@@ -139,11 +100,7 @@ module Fluent
|
|
139
100
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
140
101
|
log.error "tabledata.insertAll API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
|
141
102
|
|
142
|
-
|
143
|
-
raise RetryableError.new(nil, e)
|
144
|
-
else
|
145
|
-
raise UnRetryableError.new(nil, e)
|
146
|
-
end
|
103
|
+
raise Fluent::BigQuery::Error.wrap(e)
|
147
104
|
end
|
148
105
|
|
149
106
|
def create_load_job(project, dataset, table_id, upload_source, job_id, fields, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
|
@@ -175,7 +132,7 @@ module Fluent
|
|
175
132
|
configuration[:configuration][:load].delete(:schema)
|
176
133
|
end
|
177
134
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
|
178
|
-
raise UnRetryableError.new("Schema is empty") if fields.empty?
|
135
|
+
raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
|
179
136
|
end
|
180
137
|
|
181
138
|
res = client.insert_job(
|
@@ -205,11 +162,7 @@ module Fluent
|
|
205
162
|
|
206
163
|
return wait_load_job(project, dataset, job_id, table_id) if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
207
164
|
|
208
|
-
|
209
|
-
raise RetryableError.new(nil, e)
|
210
|
-
else
|
211
|
-
raise UnRetryableError.new(nil, e)
|
212
|
-
end
|
165
|
+
raise Fluent::BigQuery::Error.wrap(e)
|
213
166
|
end
|
214
167
|
|
215
168
|
def wait_load_job(project, dataset, job_id, table_id, retryable: true)
|
@@ -232,10 +185,10 @@ module Fluent
|
|
232
185
|
error_result = _response.status.error_result
|
233
186
|
if error_result
|
234
187
|
log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
|
235
|
-
if retryable &&
|
236
|
-
raise RetryableError.new("failed to load into bigquery, retry")
|
188
|
+
if retryable && Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
|
189
|
+
raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
|
237
190
|
else
|
238
|
-
raise UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
191
|
+
raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
239
192
|
end
|
240
193
|
end
|
241
194
|
|
@@ -5,6 +5,7 @@ require 'fluent/plugin/bigquery/version'
|
|
5
5
|
require 'fluent/mixin/config_placeholders'
|
6
6
|
require 'fluent/mixin/plaintextformatter'
|
7
7
|
|
8
|
+
require 'fluent/plugin/bigquery/errors'
|
8
9
|
require 'fluent/plugin/bigquery/schema'
|
9
10
|
require 'fluent/plugin/bigquery/writer'
|
10
11
|
|
@@ -12,10 +13,6 @@ require 'fluent/plugin/bigquery/writer'
|
|
12
13
|
# require 'fluent/plugin/bigquery/load_request_body_wrapper'
|
13
14
|
|
14
15
|
module Fluent
|
15
|
-
### TODO: error classes for each api error responses
|
16
|
-
# class BigQueryAPIError < StandardError
|
17
|
-
# end
|
18
|
-
|
19
16
|
class BigQueryOutput < TimeSlicedOutput
|
20
17
|
Fluent::Plugin.register_output('bigquery', self)
|
21
18
|
|
@@ -414,7 +411,7 @@ module Fluent
|
|
414
411
|
|
415
412
|
def insert(table_id, rows, template_suffix)
|
416
413
|
writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
|
417
|
-
rescue Fluent::BigQuery::
|
414
|
+
rescue Fluent::BigQuery::Error => e
|
418
415
|
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
419
416
|
# Table Not Found: Auto Create Table
|
420
417
|
writer.create_table(@project, @dataset, table_id, @fields, time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration)
|
@@ -467,7 +464,7 @@ module Fluent
|
|
467
464
|
time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
|
468
465
|
})
|
469
466
|
end
|
470
|
-
rescue Fluent::BigQuery::
|
467
|
+
rescue Fluent::BigQuery::Error => e
|
471
468
|
if e.retryable?
|
472
469
|
raise e
|
473
470
|
elsif @secondary
|
@@ -923,10 +923,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
923
923
|
skip_invalid_rows: false,
|
924
924
|
ignore_unknown_values: false
|
925
925
|
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
926
|
-
ex = Google::Apis::ServerError.new("error")
|
927
|
-
def ex.reason
|
928
|
-
"backendError"
|
929
|
-
end
|
926
|
+
ex = Google::Apis::ServerError.new("error", status_code: 500)
|
930
927
|
raise ex
|
931
928
|
end
|
932
929
|
|
@@ -936,7 +933,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
936
933
|
end
|
937
934
|
|
938
935
|
driver.instance.start
|
939
|
-
assert_raise Fluent::BigQuery::
|
936
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
940
937
|
driver.instance.write(chunk)
|
941
938
|
end
|
942
939
|
driver.instance.shutdown
|
@@ -971,7 +968,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
971
968
|
skip_invalid_rows: false,
|
972
969
|
ignore_unknown_values: false
|
973
970
|
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
974
|
-
ex = Google::Apis::ServerError.new("error")
|
971
|
+
ex = Google::Apis::ServerError.new("error", status_code: 501)
|
975
972
|
def ex.reason
|
976
973
|
"invalid"
|
977
974
|
end
|
@@ -1191,7 +1188,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1191
1188
|
end
|
1192
1189
|
|
1193
1190
|
driver.instance.start
|
1194
|
-
assert_raise Fluent::BigQuery::
|
1191
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
1195
1192
|
driver.instance.write(chunk)
|
1196
1193
|
end
|
1197
1194
|
driver.instance.shutdown
|
@@ -1427,7 +1424,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1427
1424
|
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
|
1428
1425
|
skip_invalid_rows: false,
|
1429
1426
|
ignore_unknown_values: false,
|
1430
|
-
)) { raise Fluent::BigQuery::
|
1427
|
+
)) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
|
1431
1428
|
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: nil, time_partitioning_expiration: nil)
|
1432
1429
|
|
1433
1430
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
@@ -1489,7 +1486,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1489
1486
|
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
|
1490
1487
|
skip_invalid_rows: false,
|
1491
1488
|
ignore_unknown_values: false,
|
1492
|
-
)) { raise Fluent::BigQuery::
|
1489
|
+
)) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
|
1493
1490
|
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: :day, time_partitioning_expiration: 3600)
|
1494
1491
|
|
1495
1492
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
|
+
- joker1007
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
12
|
+
date: 2016-11-15 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: rake
|
@@ -188,6 +189,7 @@ description: Fluentd plugin to store data on Google BigQuery, by load, or by str
|
|
188
189
|
inserts
|
189
190
|
email:
|
190
191
|
- i.naoya@gmail.com
|
192
|
+
- kakyoin.hierophant@gmail.com
|
191
193
|
executables: []
|
192
194
|
extensions: []
|
193
195
|
extra_rdoc_files: []
|
@@ -199,6 +201,7 @@ files:
|
|
199
201
|
- README.md
|
200
202
|
- Rakefile
|
201
203
|
- fluent-plugin-bigquery.gemspec
|
204
|
+
- lib/fluent/plugin/bigquery/errors.rb
|
202
205
|
- lib/fluent/plugin/bigquery/schema.rb
|
203
206
|
- lib/fluent/plugin/bigquery/version.rb
|
204
207
|
- lib/fluent/plugin/bigquery/writer.rb
|