fluent-plugin-bigquery 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
|
4
|
+
data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
|
7
|
+
data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
|
data/README.md
CHANGED
@@ -17,6 +17,13 @@ OAuth flow for installed applications.
|
|
17
17
|
## Notice
|
18
18
|
If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
|
19
19
|
|
20
|
+
## With docker image
|
21
|
+
If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
|
22
|
+
You need to install `bigdecimal` gem on your own dockerfile.
|
23
|
+
Because alpine based image has only minimal ruby environment in order to reduce image size.
|
24
|
+
And in most case, dependency to embedded gem is not written on gemspec.
|
25
|
+
Because embbeded gem dependency sometimes restricts ruby environment.
|
26
|
+
|
20
27
|
## Configuration
|
21
28
|
|
22
29
|
### Options
|
@@ -59,6 +66,7 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
|
|
59
66
|
| replace_record_key_regexp{1-10} | string | no | nil | see examples. |
|
60
67
|
| convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
|
61
68
|
| insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
|
69
|
+
| allow_retry_insert_errors | bool | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
62
70
|
| request_timeout_sec | integer | no | nil | Bigquery API response timeout |
|
63
71
|
| request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
64
72
|
| time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
@@ -3,6 +3,7 @@ module Fluent
|
|
3
3
|
# @abstract
|
4
4
|
class Error < StandardError
|
5
5
|
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
6
|
+
RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
|
6
7
|
RETRYABLE_STATUS_CODE = [500, 503]
|
7
8
|
|
8
9
|
class << self
|
@@ -29,6 +30,10 @@ module Fluent
|
|
29
30
|
RETRYABLE_ERROR_REASON.include?(reason)
|
30
31
|
end
|
31
32
|
|
33
|
+
def retryable_insert_errors_reason?(reason)
|
34
|
+
RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
|
35
|
+
end
|
36
|
+
|
32
37
|
# Guard for instantiation
|
33
38
|
private :new
|
34
39
|
def inherited(subclass)
|
@@ -84,7 +84,7 @@ module Fluent
|
|
84
84
|
nil
|
85
85
|
end
|
86
86
|
|
87
|
-
def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60)
|
87
|
+
def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
|
88
88
|
body = {
|
89
89
|
rows: rows,
|
90
90
|
skip_invalid_rows: skip_invalid_rows,
|
@@ -95,7 +95,20 @@ module Fluent
|
|
95
95
|
options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
|
96
96
|
})
|
97
97
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
98
|
-
|
98
|
+
|
99
|
+
if res.insert_errors && !res.insert_errors.empty?
|
100
|
+
log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
|
101
|
+
if allow_retry_insert_errors
|
102
|
+
is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
|
103
|
+
insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
|
104
|
+
end
|
105
|
+
if is_included_any_retryable_insert_error
|
106
|
+
raise Fluent::BigQuery::RetryableError.new("failed to insert into bigquery(insert errors), retry")
|
107
|
+
else
|
108
|
+
raise Fluent::BigQuery::UnRetryableError.new("failed to insert into bigquery(insert errors), and cannot retry")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
99
112
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
100
113
|
@client = nil
|
101
114
|
|
@@ -166,7 +179,7 @@ module Fluent
|
|
166
179
|
end
|
167
180
|
|
168
181
|
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
169
|
-
wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
182
|
+
wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
170
183
|
@num_errors_per_chunk.delete(chunk_id)
|
171
184
|
return
|
172
185
|
end
|
@@ -121,6 +121,10 @@ module Fluent
|
|
121
121
|
|
122
122
|
config_param :method, :enum, list: [:insert, :load], default: :insert, skip_accessor: true
|
123
123
|
|
124
|
+
# allow_retry_insert_errors (only insert)
|
125
|
+
# If insert_id_field is not specified, true means to allow duplicate rows
|
126
|
+
config_param :allow_retry_insert_errors, :bool, default: false
|
127
|
+
|
124
128
|
# TODO
|
125
129
|
# config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
|
126
130
|
# config_param :insert_size_limit, :integer, default: 1000**2 # < 1MB
|
@@ -423,7 +427,7 @@ module Fluent
|
|
423
427
|
end
|
424
428
|
|
425
429
|
def insert(table_id, rows, template_suffix)
|
426
|
-
writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
|
430
|
+
writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
|
427
431
|
rescue Fluent::BigQuery::Error => e
|
428
432
|
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
429
433
|
# Table Not Found: Auto Create Table
|
@@ -55,6 +55,21 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
55
55
|
writer
|
56
56
|
end
|
57
57
|
|
58
|
+
# ref. https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/ea2be47beb32615b2bf69f8a846a684f86c8328c/google-cloud-bigquery/test/google/cloud/bigquery/table_insert_test.rb#L141
|
59
|
+
def failure_insert_errors(reason, error_count, insert_error_count)
|
60
|
+
error = Google::Apis::BigqueryV2::ErrorProto.new(
|
61
|
+
reason: reason
|
62
|
+
)
|
63
|
+
insert_error = Google::Apis::BigqueryV2::InsertAllTableDataResponse::InsertError.new(
|
64
|
+
errors: [].fill(error, 0, error_count)
|
65
|
+
)
|
66
|
+
|
67
|
+
res = Google::Apis::BigqueryV2::InsertAllTableDataResponse.new(
|
68
|
+
insert_errors: [].fill(insert_error, 0, insert_error_count)
|
69
|
+
)
|
70
|
+
return res
|
71
|
+
end
|
72
|
+
|
58
73
|
def test_configure_table
|
59
74
|
driver = create_driver
|
60
75
|
assert_equal driver.instance.table, 'foo'
|
@@ -886,6 +901,143 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
886
901
|
driver.instance.shutdown
|
887
902
|
end
|
888
903
|
|
904
|
+
def test_write_with_retryable_insert_errors
|
905
|
+
data_input = [
|
906
|
+
{ "error_count" => 1, "insert_error_count" => 1 },
|
907
|
+
{ "error_count" => 10, "insert_error_count" => 1 },
|
908
|
+
{ "error_count" => 10, "insert_error_count" => 10 },
|
909
|
+
]
|
910
|
+
|
911
|
+
data_input.each do |d|
|
912
|
+
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
913
|
+
allow_retry_insert_errors = true
|
914
|
+
driver = create_driver(<<-CONFIG)
|
915
|
+
table foo
|
916
|
+
email foo@bar.example
|
917
|
+
private_key_path /path/to/key
|
918
|
+
project yourproject_id
|
919
|
+
dataset yourdataset_id
|
920
|
+
|
921
|
+
allow_retry_insert_errors #{allow_retry_insert_errors}
|
922
|
+
|
923
|
+
time_format %s
|
924
|
+
time_field time
|
925
|
+
|
926
|
+
schema [
|
927
|
+
{"name": "time", "type": "INTEGER"},
|
928
|
+
{"name": "status", "type": "INTEGER"},
|
929
|
+
{"name": "bytes", "type": "INTEGER"},
|
930
|
+
{"name": "vhost", "type": "STRING"},
|
931
|
+
{"name": "path", "type": "STRING"},
|
932
|
+
{"name": "method", "type": "STRING"},
|
933
|
+
{"name": "protocol", "type": "STRING"},
|
934
|
+
{"name": "agent", "type": "STRING"},
|
935
|
+
{"name": "referer", "type": "STRING"},
|
936
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
937
|
+
{"name": "host", "type": "STRING"},
|
938
|
+
{"name": "ip", "type": "STRING"},
|
939
|
+
{"name": "user", "type": "STRING"}
|
940
|
+
]},
|
941
|
+
{"name": "requesttime", "type": "FLOAT"},
|
942
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
943
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
944
|
+
]
|
945
|
+
<secondary>
|
946
|
+
type file
|
947
|
+
path error
|
948
|
+
utc
|
949
|
+
</secondary>
|
950
|
+
CONFIG
|
951
|
+
|
952
|
+
writer = stub_writer(driver)
|
953
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
954
|
+
rows: entry,
|
955
|
+
skip_invalid_rows: false,
|
956
|
+
ignore_unknown_values: false
|
957
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
958
|
+
s = failure_insert_errors("timeout", d["error_count"], d["insert_error_count"])
|
959
|
+
s
|
960
|
+
end
|
961
|
+
|
962
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
963
|
+
entry.each do |e|
|
964
|
+
chunk << e.to_msgpack
|
965
|
+
end
|
966
|
+
|
967
|
+
driver.instance.start
|
968
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
969
|
+
driver.instance.write(chunk)
|
970
|
+
end
|
971
|
+
driver.instance.shutdown
|
972
|
+
end
|
973
|
+
end
|
974
|
+
|
975
|
+
def test_write_with_not_retryable_insert_errors
|
976
|
+
data_input = [
|
977
|
+
{ "allow_retry_insert_errors" => false, "reason" => "timeout" },
|
978
|
+
{ "allow_retry_insert_errors" => true, "reason" => "stopped" },
|
979
|
+
]
|
980
|
+
data_input.each do |d|
|
981
|
+
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
982
|
+
driver = create_driver(<<-CONFIG)
|
983
|
+
table foo
|
984
|
+
email foo@bar.example
|
985
|
+
private_key_path /path/to/key
|
986
|
+
project yourproject_id
|
987
|
+
dataset yourdataset_id
|
988
|
+
|
989
|
+
allow_retry_insert_errors #{d["allow_retry_insert_errors"]}
|
990
|
+
|
991
|
+
time_format %s
|
992
|
+
time_field time
|
993
|
+
|
994
|
+
schema [
|
995
|
+
{"name": "time", "type": "INTEGER"},
|
996
|
+
{"name": "status", "type": "INTEGER"},
|
997
|
+
{"name": "bytes", "type": "INTEGER"},
|
998
|
+
{"name": "vhost", "type": "STRING"},
|
999
|
+
{"name": "path", "type": "STRING"},
|
1000
|
+
{"name": "method", "type": "STRING"},
|
1001
|
+
{"name": "protocol", "type": "STRING"},
|
1002
|
+
{"name": "agent", "type": "STRING"},
|
1003
|
+
{"name": "referer", "type": "STRING"},
|
1004
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
1005
|
+
{"name": "host", "type": "STRING"},
|
1006
|
+
{"name": "ip", "type": "STRING"},
|
1007
|
+
{"name": "user", "type": "STRING"}
|
1008
|
+
]},
|
1009
|
+
{"name": "requesttime", "type": "FLOAT"},
|
1010
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
1011
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
1012
|
+
]
|
1013
|
+
<secondary>
|
1014
|
+
type file
|
1015
|
+
path error
|
1016
|
+
utc
|
1017
|
+
</secondary>
|
1018
|
+
CONFIG
|
1019
|
+
|
1020
|
+
writer = stub_writer(driver)
|
1021
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
1022
|
+
rows: entry,
|
1023
|
+
skip_invalid_rows: false,
|
1024
|
+
ignore_unknown_values: false
|
1025
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
1026
|
+
s = failure_insert_errors(d["reason"], 1, 1)
|
1027
|
+
s
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1031
|
+
entry.each do |e|
|
1032
|
+
chunk << e.to_msgpack
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
driver.instance.start
|
1036
|
+
driver.instance.write(chunk)
|
1037
|
+
driver.instance.shutdown
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
|
889
1041
|
def test_write_for_load
|
890
1042
|
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
891
1043
|
entry = {a: "b"}, {b: "c"}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-03-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|