fluent-plugin-bigquery 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/fluent-plugin-bigquery.gemspec +2 -2
- data/lib/fluent/plugin/bigquery/errors.rb +83 -0
- data/lib/fluent/plugin/bigquery/version.rb +1 -2
- data/lib/fluent/plugin/bigquery/writer.rb +8 -55
- data/lib/fluent/plugin/out_bigquery.rb +3 -6
- data/test/plugin/test_out_bigquery.rb +6 -9
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66cdf151a1711b82f972e108d736638c017637ca
|
4
|
+
data.tar.gz: 29de9f545e896319d26105753a8569a60d411441
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e413ccab59cb232dae2b2808bb4637df291eb96a0540a98387aff8397a0fb39b0f7585c6c0d565d6f2d5851b516737e7308deb9c11572a303af55832f49cd13
|
7
|
+
data.tar.gz: f7fdfa22e82ff9f5748782e8b62429c005467bfec9ff3ea0133b2cd53f71359d90789275d42859b96f2d583830a1a6160916a0452ad9efa5a57c559fe07af366
|
@@ -6,8 +6,8 @@ require 'fluent/plugin/bigquery/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "fluent-plugin-bigquery"
|
8
8
|
spec.version = Fluent::BigQueryPlugin::VERSION
|
9
|
-
spec.authors = ["Naoya Ito"]
|
10
|
-
spec.email = ["i.naoya@gmail.com"]
|
9
|
+
spec.authors = ["Naoya Ito", "joker1007"]
|
10
|
+
spec.email = ["i.naoya@gmail.com", "kakyoin.hierophant@gmail.com"]
|
11
11
|
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
12
12
|
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
13
13
|
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Fluent
|
2
|
+
module BigQuery
|
3
|
+
# @abstract
|
4
|
+
class Error < StandardError
|
5
|
+
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
6
|
+
RETRYABLE_STATUS_CODE = [500, 503]
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def wrap(google_api_error, message = nil, force_unretryable: false)
|
10
|
+
e = google_api_error
|
11
|
+
return UnRetryableError.new(message, e) if force_unretryable
|
12
|
+
|
13
|
+
if retryable_error?(e)
|
14
|
+
RetryableError.new(message, e)
|
15
|
+
else
|
16
|
+
UnRetryableError.new(message, e)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def retryable_error?(google_api_error)
|
21
|
+
e = google_api_error
|
22
|
+
reason = e.respond_to?(:reason) ? e.reason : nil
|
23
|
+
|
24
|
+
retryable_error_reason?(reason) ||
|
25
|
+
(e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
|
26
|
+
end
|
27
|
+
|
28
|
+
def retryable_error_reason?(reason)
|
29
|
+
RETRYABLE_ERROR_REASON.include?(reason)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Guard for instantiation
|
33
|
+
private :new
|
34
|
+
def inherited(subclass)
|
35
|
+
subclass.class_eval do
|
36
|
+
class << self
|
37
|
+
public :new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_reader :origin
|
44
|
+
|
45
|
+
def initialize(message, origin = nil)
|
46
|
+
@origin = origin
|
47
|
+
super(message || origin.message)
|
48
|
+
end
|
49
|
+
|
50
|
+
def method_missing(name, *args)
|
51
|
+
if @origin
|
52
|
+
@origin.send(name, *args)
|
53
|
+
else
|
54
|
+
super
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def reason
|
59
|
+
@origin && @origin.respond_to?(:reason) ? @origin.reason : nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def status_code
|
63
|
+
@origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def body
|
67
|
+
@origin && @origin.respond_to?(:body) ? @origin.body : nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def retryable?
|
71
|
+
false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class UnRetryableError < Error; end
|
76
|
+
|
77
|
+
class RetryableError < Error
|
78
|
+
def retryable?
|
79
|
+
true
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -1,45 +1,6 @@
|
|
1
1
|
module Fluent
|
2
2
|
module BigQuery
|
3
3
|
class Writer
|
4
|
-
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
5
|
-
|
6
|
-
class Error < StandardError
|
7
|
-
attr_reader :origin
|
8
|
-
|
9
|
-
def initialize(message, origin = nil)
|
10
|
-
@origin = origin
|
11
|
-
super(message || origin.message)
|
12
|
-
end
|
13
|
-
|
14
|
-
def method_missing(name, *args)
|
15
|
-
if @origin
|
16
|
-
@origin.send(name, *args)
|
17
|
-
else
|
18
|
-
super
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def reason
|
23
|
-
@origin && @origin.respond_to?(:reason) ? @origin.reason : nil
|
24
|
-
end
|
25
|
-
|
26
|
-
def status_code
|
27
|
-
@origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
|
28
|
-
end
|
29
|
-
|
30
|
-
def retryable?
|
31
|
-
false
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
class UnRetryableError < Error; end
|
36
|
-
|
37
|
-
class RetryableError < Error
|
38
|
-
def retryable?
|
39
|
-
true
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
4
|
def initialize(log, auth_method, auth_options = {})
|
44
5
|
@auth_method = auth_method
|
45
6
|
@scope = "https://www.googleapis.com/auth/bigquery"
|
@@ -97,13 +58,13 @@ module Fluent
|
|
97
58
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
98
59
|
log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
|
99
60
|
|
100
|
-
if
|
61
|
+
if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
|
101
62
|
sleep create_table_retry_wait
|
102
63
|
create_table_retry_wait *= 2
|
103
64
|
create_table_retry_count += 1
|
104
65
|
retry
|
105
66
|
else
|
106
|
-
raise UnRetryableError.new("failed to create table in bigquery", e)
|
67
|
+
raise Fluent::BigQuery::UnRetryableError.new("failed to create table in bigquery", e)
|
107
68
|
end
|
108
69
|
end
|
109
70
|
end
|
@@ -139,11 +100,7 @@ module Fluent
|
|
139
100
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
140
101
|
log.error "tabledata.insertAll API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
|
141
102
|
|
142
|
-
|
143
|
-
raise RetryableError.new(nil, e)
|
144
|
-
else
|
145
|
-
raise UnRetryableError.new(nil, e)
|
146
|
-
end
|
103
|
+
raise Fluent::BigQuery::Error.wrap(e)
|
147
104
|
end
|
148
105
|
|
149
106
|
def create_load_job(project, dataset, table_id, upload_source, job_id, fields, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
|
@@ -175,7 +132,7 @@ module Fluent
|
|
175
132
|
configuration[:configuration][:load].delete(:schema)
|
176
133
|
end
|
177
134
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
|
178
|
-
raise UnRetryableError.new("Schema is empty") if fields.empty?
|
135
|
+
raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
|
179
136
|
end
|
180
137
|
|
181
138
|
res = client.insert_job(
|
@@ -205,11 +162,7 @@ module Fluent
|
|
205
162
|
|
206
163
|
return wait_load_job(project, dataset, job_id, table_id) if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
207
164
|
|
208
|
-
|
209
|
-
raise RetryableError.new(nil, e)
|
210
|
-
else
|
211
|
-
raise UnRetryableError.new(nil, e)
|
212
|
-
end
|
165
|
+
raise Fluent::BigQuery::Error.wrap(e)
|
213
166
|
end
|
214
167
|
|
215
168
|
def wait_load_job(project, dataset, job_id, table_id, retryable: true)
|
@@ -232,10 +185,10 @@ module Fluent
|
|
232
185
|
error_result = _response.status.error_result
|
233
186
|
if error_result
|
234
187
|
log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
|
235
|
-
if retryable &&
|
236
|
-
raise RetryableError.new("failed to load into bigquery, retry")
|
188
|
+
if retryable && Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
|
189
|
+
raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
|
237
190
|
else
|
238
|
-
raise UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
191
|
+
raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
239
192
|
end
|
240
193
|
end
|
241
194
|
|
@@ -5,6 +5,7 @@ require 'fluent/plugin/bigquery/version'
|
|
5
5
|
require 'fluent/mixin/config_placeholders'
|
6
6
|
require 'fluent/mixin/plaintextformatter'
|
7
7
|
|
8
|
+
require 'fluent/plugin/bigquery/errors'
|
8
9
|
require 'fluent/plugin/bigquery/schema'
|
9
10
|
require 'fluent/plugin/bigquery/writer'
|
10
11
|
|
@@ -12,10 +13,6 @@ require 'fluent/plugin/bigquery/writer'
|
|
12
13
|
# require 'fluent/plugin/bigquery/load_request_body_wrapper'
|
13
14
|
|
14
15
|
module Fluent
|
15
|
-
### TODO: error classes for each api error responses
|
16
|
-
# class BigQueryAPIError < StandardError
|
17
|
-
# end
|
18
|
-
|
19
16
|
class BigQueryOutput < TimeSlicedOutput
|
20
17
|
Fluent::Plugin.register_output('bigquery', self)
|
21
18
|
|
@@ -414,7 +411,7 @@ module Fluent
|
|
414
411
|
|
415
412
|
def insert(table_id, rows, template_suffix)
|
416
413
|
writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
|
417
|
-
rescue Fluent::BigQuery::
|
414
|
+
rescue Fluent::BigQuery::Error => e
|
418
415
|
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
419
416
|
# Table Not Found: Auto Create Table
|
420
417
|
writer.create_table(@project, @dataset, table_id, @fields, time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration)
|
@@ -467,7 +464,7 @@ module Fluent
|
|
467
464
|
time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
|
468
465
|
})
|
469
466
|
end
|
470
|
-
rescue Fluent::BigQuery::
|
467
|
+
rescue Fluent::BigQuery::Error => e
|
471
468
|
if e.retryable?
|
472
469
|
raise e
|
473
470
|
elsif @secondary
|
@@ -923,10 +923,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
923
923
|
skip_invalid_rows: false,
|
924
924
|
ignore_unknown_values: false
|
925
925
|
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
926
|
-
ex = Google::Apis::ServerError.new("error")
|
927
|
-
def ex.reason
|
928
|
-
"backendError"
|
929
|
-
end
|
926
|
+
ex = Google::Apis::ServerError.new("error", status_code: 500)
|
930
927
|
raise ex
|
931
928
|
end
|
932
929
|
|
@@ -936,7 +933,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
936
933
|
end
|
937
934
|
|
938
935
|
driver.instance.start
|
939
|
-
assert_raise Fluent::BigQuery::
|
936
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
940
937
|
driver.instance.write(chunk)
|
941
938
|
end
|
942
939
|
driver.instance.shutdown
|
@@ -971,7 +968,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
971
968
|
skip_invalid_rows: false,
|
972
969
|
ignore_unknown_values: false
|
973
970
|
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
974
|
-
ex = Google::Apis::ServerError.new("error")
|
971
|
+
ex = Google::Apis::ServerError.new("error", status_code: 501)
|
975
972
|
def ex.reason
|
976
973
|
"invalid"
|
977
974
|
end
|
@@ -1191,7 +1188,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1191
1188
|
end
|
1192
1189
|
|
1193
1190
|
driver.instance.start
|
1194
|
-
assert_raise Fluent::BigQuery::
|
1191
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
1195
1192
|
driver.instance.write(chunk)
|
1196
1193
|
end
|
1197
1194
|
driver.instance.shutdown
|
@@ -1427,7 +1424,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1427
1424
|
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
|
1428
1425
|
skip_invalid_rows: false,
|
1429
1426
|
ignore_unknown_values: false,
|
1430
|
-
)) { raise Fluent::BigQuery::
|
1427
|
+
)) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
|
1431
1428
|
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: nil, time_partitioning_expiration: nil)
|
1432
1429
|
|
1433
1430
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
@@ -1489,7 +1486,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1489
1486
|
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
|
1490
1487
|
skip_invalid_rows: false,
|
1491
1488
|
ignore_unknown_values: false,
|
1492
|
-
)) { raise Fluent::BigQuery::
|
1489
|
+
)) { raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
|
1493
1490
|
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields), time_partitioning_type: :day, time_partitioning_expiration: 3600)
|
1494
1491
|
|
1495
1492
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
|
+
- joker1007
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
12
|
+
date: 2016-11-15 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: rake
|
@@ -188,6 +189,7 @@ description: Fluentd plugin to store data on Google BigQuery, by load, or by str
|
|
188
189
|
inserts
|
189
190
|
email:
|
190
191
|
- i.naoya@gmail.com
|
192
|
+
- kakyoin.hierophant@gmail.com
|
191
193
|
executables: []
|
192
194
|
extensions: []
|
193
195
|
extra_rdoc_files: []
|
@@ -199,6 +201,7 @@ files:
|
|
199
201
|
- README.md
|
200
202
|
- Rakefile
|
201
203
|
- fluent-plugin-bigquery.gemspec
|
204
|
+
- lib/fluent/plugin/bigquery/errors.rb
|
202
205
|
- lib/fluent/plugin/bigquery/schema.rb
|
203
206
|
- lib/fluent/plugin/bigquery/version.rb
|
204
207
|
- lib/fluent/plugin/bigquery/writer.rb
|