fluent-plugin-bigquery 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
|
4
|
+
data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
|
7
|
+
data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
|
data/README.md
CHANGED
@@ -17,6 +17,13 @@ OAuth flow for installed applications.
|
|
17
17
|
## Notice
|
18
18
|
If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
|
19
19
|
|
20
|
+
## With docker image
|
21
|
+
If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
|
22
|
+
You need to install `bigdecimal` gem on your own dockerfile.
|
23
|
+
Because alpine based image has only minimal ruby environment in order to reduce image size.
|
24
|
+
And in most case, dependency to embedded gem is not written on gemspec.
|
25
|
+
Because embbeded gem dependency sometimes restricts ruby environment.
|
26
|
+
|
20
27
|
## Configuration
|
21
28
|
|
22
29
|
### Options
|
@@ -59,6 +66,7 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
|
|
59
66
|
| replace_record_key_regexp{1-10} | string | no | nil | see examples. |
|
60
67
|
| convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
|
61
68
|
| insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
|
69
|
+
| allow_retry_insert_errors | bool | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
62
70
|
| request_timeout_sec | integer | no | nil | Bigquery API response timeout |
|
63
71
|
| request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
64
72
|
| time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
@@ -3,6 +3,7 @@ module Fluent
|
|
3
3
|
# @abstract
|
4
4
|
class Error < StandardError
|
5
5
|
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
6
|
+
RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
|
6
7
|
RETRYABLE_STATUS_CODE = [500, 503]
|
7
8
|
|
8
9
|
class << self
|
@@ -29,6 +30,10 @@ module Fluent
|
|
29
30
|
RETRYABLE_ERROR_REASON.include?(reason)
|
30
31
|
end
|
31
32
|
|
33
|
+
def retryable_insert_errors_reason?(reason)
|
34
|
+
RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
|
35
|
+
end
|
36
|
+
|
32
37
|
# Guard for instantiation
|
33
38
|
private :new
|
34
39
|
def inherited(subclass)
|
@@ -84,7 +84,7 @@ module Fluent
|
|
84
84
|
nil
|
85
85
|
end
|
86
86
|
|
87
|
-
def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60)
|
87
|
+
def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
|
88
88
|
body = {
|
89
89
|
rows: rows,
|
90
90
|
skip_invalid_rows: skip_invalid_rows,
|
@@ -95,7 +95,20 @@ module Fluent
|
|
95
95
|
options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
|
96
96
|
})
|
97
97
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
98
|
-
|
98
|
+
|
99
|
+
if res.insert_errors && !res.insert_errors.empty?
|
100
|
+
log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
|
101
|
+
if allow_retry_insert_errors
|
102
|
+
is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
|
103
|
+
insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
|
104
|
+
end
|
105
|
+
if is_included_any_retryable_insert_error
|
106
|
+
raise Fluent::BigQuery::RetryableError.new("failed to insert into bigquery(insert errors), retry")
|
107
|
+
else
|
108
|
+
raise Fluent::BigQuery::UnRetryableError.new("failed to insert into bigquery(insert errors), and cannot retry")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
99
112
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
100
113
|
@client = nil
|
101
114
|
|
@@ -166,7 +179,7 @@ module Fluent
|
|
166
179
|
end
|
167
180
|
|
168
181
|
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
169
|
-
wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
182
|
+
wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
170
183
|
@num_errors_per_chunk.delete(chunk_id)
|
171
184
|
return
|
172
185
|
end
|
@@ -121,6 +121,10 @@ module Fluent
|
|
121
121
|
|
122
122
|
config_param :method, :enum, list: [:insert, :load], default: :insert, skip_accessor: true
|
123
123
|
|
124
|
+
# allow_retry_insert_errors (only insert)
|
125
|
+
# If insert_id_field is not specified, true means to allow duplicate rows
|
126
|
+
config_param :allow_retry_insert_errors, :bool, default: false
|
127
|
+
|
124
128
|
# TODO
|
125
129
|
# config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
|
126
130
|
# config_param :insert_size_limit, :integer, default: 1000**2 # < 1MB
|
@@ -423,7 +427,7 @@ module Fluent
|
|
423
427
|
end
|
424
428
|
|
425
429
|
def insert(table_id, rows, template_suffix)
|
426
|
-
writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
|
430
|
+
writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
|
427
431
|
rescue Fluent::BigQuery::Error => e
|
428
432
|
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
429
433
|
# Table Not Found: Auto Create Table
|
@@ -55,6 +55,21 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
55
55
|
writer
|
56
56
|
end
|
57
57
|
|
58
|
+
# ref. https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/ea2be47beb32615b2bf69f8a846a684f86c8328c/google-cloud-bigquery/test/google/cloud/bigquery/table_insert_test.rb#L141
|
59
|
+
def failure_insert_errors(reason, error_count, insert_error_count)
|
60
|
+
error = Google::Apis::BigqueryV2::ErrorProto.new(
|
61
|
+
reason: reason
|
62
|
+
)
|
63
|
+
insert_error = Google::Apis::BigqueryV2::InsertAllTableDataResponse::InsertError.new(
|
64
|
+
errors: [].fill(error, 0, error_count)
|
65
|
+
)
|
66
|
+
|
67
|
+
res = Google::Apis::BigqueryV2::InsertAllTableDataResponse.new(
|
68
|
+
insert_errors: [].fill(insert_error, 0, insert_error_count)
|
69
|
+
)
|
70
|
+
return res
|
71
|
+
end
|
72
|
+
|
58
73
|
def test_configure_table
|
59
74
|
driver = create_driver
|
60
75
|
assert_equal driver.instance.table, 'foo'
|
@@ -886,6 +901,143 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
886
901
|
driver.instance.shutdown
|
887
902
|
end
|
888
903
|
|
904
|
+
def test_write_with_retryable_insert_errors
|
905
|
+
data_input = [
|
906
|
+
{ "error_count" => 1, "insert_error_count" => 1 },
|
907
|
+
{ "error_count" => 10, "insert_error_count" => 1 },
|
908
|
+
{ "error_count" => 10, "insert_error_count" => 10 },
|
909
|
+
]
|
910
|
+
|
911
|
+
data_input.each do |d|
|
912
|
+
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
913
|
+
allow_retry_insert_errors = true
|
914
|
+
driver = create_driver(<<-CONFIG)
|
915
|
+
table foo
|
916
|
+
email foo@bar.example
|
917
|
+
private_key_path /path/to/key
|
918
|
+
project yourproject_id
|
919
|
+
dataset yourdataset_id
|
920
|
+
|
921
|
+
allow_retry_insert_errors #{allow_retry_insert_errors}
|
922
|
+
|
923
|
+
time_format %s
|
924
|
+
time_field time
|
925
|
+
|
926
|
+
schema [
|
927
|
+
{"name": "time", "type": "INTEGER"},
|
928
|
+
{"name": "status", "type": "INTEGER"},
|
929
|
+
{"name": "bytes", "type": "INTEGER"},
|
930
|
+
{"name": "vhost", "type": "STRING"},
|
931
|
+
{"name": "path", "type": "STRING"},
|
932
|
+
{"name": "method", "type": "STRING"},
|
933
|
+
{"name": "protocol", "type": "STRING"},
|
934
|
+
{"name": "agent", "type": "STRING"},
|
935
|
+
{"name": "referer", "type": "STRING"},
|
936
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
937
|
+
{"name": "host", "type": "STRING"},
|
938
|
+
{"name": "ip", "type": "STRING"},
|
939
|
+
{"name": "user", "type": "STRING"}
|
940
|
+
]},
|
941
|
+
{"name": "requesttime", "type": "FLOAT"},
|
942
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
943
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
944
|
+
]
|
945
|
+
<secondary>
|
946
|
+
type file
|
947
|
+
path error
|
948
|
+
utc
|
949
|
+
</secondary>
|
950
|
+
CONFIG
|
951
|
+
|
952
|
+
writer = stub_writer(driver)
|
953
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
954
|
+
rows: entry,
|
955
|
+
skip_invalid_rows: false,
|
956
|
+
ignore_unknown_values: false
|
957
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
958
|
+
s = failure_insert_errors("timeout", d["error_count"], d["insert_error_count"])
|
959
|
+
s
|
960
|
+
end
|
961
|
+
|
962
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
963
|
+
entry.each do |e|
|
964
|
+
chunk << e.to_msgpack
|
965
|
+
end
|
966
|
+
|
967
|
+
driver.instance.start
|
968
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
969
|
+
driver.instance.write(chunk)
|
970
|
+
end
|
971
|
+
driver.instance.shutdown
|
972
|
+
end
|
973
|
+
end
|
974
|
+
|
975
|
+
def test_write_with_not_retryable_insert_errors
|
976
|
+
data_input = [
|
977
|
+
{ "allow_retry_insert_errors" => false, "reason" => "timeout" },
|
978
|
+
{ "allow_retry_insert_errors" => true, "reason" => "stopped" },
|
979
|
+
]
|
980
|
+
data_input.each do |d|
|
981
|
+
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
982
|
+
driver = create_driver(<<-CONFIG)
|
983
|
+
table foo
|
984
|
+
email foo@bar.example
|
985
|
+
private_key_path /path/to/key
|
986
|
+
project yourproject_id
|
987
|
+
dataset yourdataset_id
|
988
|
+
|
989
|
+
allow_retry_insert_errors #{d["allow_retry_insert_errors"]}
|
990
|
+
|
991
|
+
time_format %s
|
992
|
+
time_field time
|
993
|
+
|
994
|
+
schema [
|
995
|
+
{"name": "time", "type": "INTEGER"},
|
996
|
+
{"name": "status", "type": "INTEGER"},
|
997
|
+
{"name": "bytes", "type": "INTEGER"},
|
998
|
+
{"name": "vhost", "type": "STRING"},
|
999
|
+
{"name": "path", "type": "STRING"},
|
1000
|
+
{"name": "method", "type": "STRING"},
|
1001
|
+
{"name": "protocol", "type": "STRING"},
|
1002
|
+
{"name": "agent", "type": "STRING"},
|
1003
|
+
{"name": "referer", "type": "STRING"},
|
1004
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
1005
|
+
{"name": "host", "type": "STRING"},
|
1006
|
+
{"name": "ip", "type": "STRING"},
|
1007
|
+
{"name": "user", "type": "STRING"}
|
1008
|
+
]},
|
1009
|
+
{"name": "requesttime", "type": "FLOAT"},
|
1010
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
1011
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
1012
|
+
]
|
1013
|
+
<secondary>
|
1014
|
+
type file
|
1015
|
+
path error
|
1016
|
+
utc
|
1017
|
+
</secondary>
|
1018
|
+
CONFIG
|
1019
|
+
|
1020
|
+
writer = stub_writer(driver)
|
1021
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
1022
|
+
rows: entry,
|
1023
|
+
skip_invalid_rows: false,
|
1024
|
+
ignore_unknown_values: false
|
1025
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
1026
|
+
s = failure_insert_errors(d["reason"], 1, 1)
|
1027
|
+
s
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1031
|
+
entry.each do |e|
|
1032
|
+
chunk << e.to_msgpack
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
driver.instance.start
|
1036
|
+
driver.instance.write(chunk)
|
1037
|
+
driver.instance.shutdown
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
|
889
1041
|
def test_write_for_load
|
890
1042
|
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
891
1043
|
entry = {a: "b"}, {b: "c"}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-03-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|