fluent-plugin-bigquery 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d4074dc903c423acbebd56b2b4d6fc0ce110510
4
- data.tar.gz: 4d17cd1b2ee3768b83845105b5b9a714835e0a4c
3
+ metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
4
+ data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
5
5
  SHA512:
6
- metadata.gz: 7f99c64e394650b7eac03e6872dcfafb36981f48a726d8aba9d87fc83b45329ebac925c7d1239113995597e4697d8afcf1f9397c8583d0a3bbe11d47aedd668b
7
- data.tar.gz: 52554bcd622e75486fc8a10ceeebd8af958ac5523869f2ae964324c1348b734fcef00c4232766484a0d3112c15b50eb06f334b6efaf2cd55394321139bc1df9e
6
+ metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
7
+ data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
data/README.md CHANGED
@@ -17,6 +17,13 @@ OAuth flow for installed applications.
17
17
  ## Notice
18
18
  If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
19
19
 
20
+ ## With docker image
21
+ If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
22
+ You need to install `bigdecimal` gem on your own dockerfile.
23
+ Because alpine based image has only minimal ruby environment in order to reduce image size.
24
+ And in most case, dependency to embedded gem is not written on gemspec.
25
+ Because embbeded gem dependency sometimes restricts ruby environment.
26
+
20
27
  ## Configuration
21
28
 
22
29
  ### Options
@@ -59,6 +66,7 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
59
66
  | replace_record_key_regexp{1-10} | string | no | nil | see examples. |
60
67
  | convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
61
68
  | insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
69
+ | allow_retry_insert_errors | bool | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
62
70
  | request_timeout_sec | integer | no | nil | Bigquery API response timeout |
63
71
  | request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
64
72
  | time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
@@ -3,6 +3,7 @@ module Fluent
3
3
  # @abstract
4
4
  class Error < StandardError
5
5
  RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
6
+ RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
6
7
  RETRYABLE_STATUS_CODE = [500, 503]
7
8
 
8
9
  class << self
@@ -29,6 +30,10 @@ module Fluent
29
30
  RETRYABLE_ERROR_REASON.include?(reason)
30
31
  end
31
32
 
33
+ def retryable_insert_errors_reason?(reason)
34
+ RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
35
+ end
36
+
32
37
  # Guard for instantiation
33
38
  private :new
34
39
  def inherited(subclass)
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.4.0".freeze
3
+ VERSION = "0.4.1".freeze
4
4
  end
5
5
  end
@@ -84,7 +84,7 @@ module Fluent
84
84
  nil
85
85
  end
86
86
 
87
- def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60)
87
+ def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
88
88
  body = {
89
89
  rows: rows,
90
90
  skip_invalid_rows: skip_invalid_rows,
@@ -95,7 +95,20 @@ module Fluent
95
95
  options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
96
96
  })
97
97
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
98
- log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s if res.insert_errors && !res.insert_errors.empty?
98
+
99
+ if res.insert_errors && !res.insert_errors.empty?
100
+ log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
101
+ if allow_retry_insert_errors
102
+ is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
103
+ insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
104
+ end
105
+ if is_included_any_retryable_insert_error
106
+ raise Fluent::BigQuery::RetryableError.new("failed to insert into bigquery(insert errors), retry")
107
+ else
108
+ raise Fluent::BigQuery::UnRetryableError.new("failed to insert into bigquery(insert errors), and cannot retry")
109
+ end
110
+ end
111
+ end
99
112
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
100
113
  @client = nil
101
114
 
@@ -166,7 +179,7 @@ module Fluent
166
179
  end
167
180
 
168
181
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
169
- wait_load_job(chunk_id, project, dataset, job_id, table_id)
182
+ wait_load_job(chunk_id, project, dataset, job_id, table_id)
170
183
  @num_errors_per_chunk.delete(chunk_id)
171
184
  return
172
185
  end
@@ -121,6 +121,10 @@ module Fluent
121
121
 
122
122
  config_param :method, :enum, list: [:insert, :load], default: :insert, skip_accessor: true
123
123
 
124
+ # allow_retry_insert_errors (only insert)
125
+ # If insert_id_field is not specified, true means to allow duplicate rows
126
+ config_param :allow_retry_insert_errors, :bool, default: false
127
+
124
128
  # TODO
125
129
  # config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
126
130
  # config_param :insert_size_limit, :integer, default: 1000**2 # < 1MB
@@ -423,7 +427,7 @@ module Fluent
423
427
  end
424
428
 
425
429
  def insert(table_id, rows, template_suffix)
426
- writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
430
+ writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
427
431
  rescue Fluent::BigQuery::Error => e
428
432
  if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
429
433
  # Table Not Found: Auto Create Table
@@ -55,6 +55,21 @@ class BigQueryOutputTest < Test::Unit::TestCase
55
55
  writer
56
56
  end
57
57
 
58
+ # ref. https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/ea2be47beb32615b2bf69f8a846a684f86c8328c/google-cloud-bigquery/test/google/cloud/bigquery/table_insert_test.rb#L141
59
+ def failure_insert_errors(reason, error_count, insert_error_count)
60
+ error = Google::Apis::BigqueryV2::ErrorProto.new(
61
+ reason: reason
62
+ )
63
+ insert_error = Google::Apis::BigqueryV2::InsertAllTableDataResponse::InsertError.new(
64
+ errors: [].fill(error, 0, error_count)
65
+ )
66
+
67
+ res = Google::Apis::BigqueryV2::InsertAllTableDataResponse.new(
68
+ insert_errors: [].fill(insert_error, 0, insert_error_count)
69
+ )
70
+ return res
71
+ end
72
+
58
73
  def test_configure_table
59
74
  driver = create_driver
60
75
  assert_equal driver.instance.table, 'foo'
@@ -886,6 +901,143 @@ class BigQueryOutputTest < Test::Unit::TestCase
886
901
  driver.instance.shutdown
887
902
  end
888
903
 
904
+ def test_write_with_retryable_insert_errors
905
+ data_input = [
906
+ { "error_count" => 1, "insert_error_count" => 1 },
907
+ { "error_count" => 10, "insert_error_count" => 1 },
908
+ { "error_count" => 10, "insert_error_count" => 10 },
909
+ ]
910
+
911
+ data_input.each do |d|
912
+ entry = {json: {a: "b"}}, {json: {b: "c"}}
913
+ allow_retry_insert_errors = true
914
+ driver = create_driver(<<-CONFIG)
915
+ table foo
916
+ email foo@bar.example
917
+ private_key_path /path/to/key
918
+ project yourproject_id
919
+ dataset yourdataset_id
920
+
921
+ allow_retry_insert_errors #{allow_retry_insert_errors}
922
+
923
+ time_format %s
924
+ time_field time
925
+
926
+ schema [
927
+ {"name": "time", "type": "INTEGER"},
928
+ {"name": "status", "type": "INTEGER"},
929
+ {"name": "bytes", "type": "INTEGER"},
930
+ {"name": "vhost", "type": "STRING"},
931
+ {"name": "path", "type": "STRING"},
932
+ {"name": "method", "type": "STRING"},
933
+ {"name": "protocol", "type": "STRING"},
934
+ {"name": "agent", "type": "STRING"},
935
+ {"name": "referer", "type": "STRING"},
936
+ {"name": "remote", "type": "RECORD", "fields": [
937
+ {"name": "host", "type": "STRING"},
938
+ {"name": "ip", "type": "STRING"},
939
+ {"name": "user", "type": "STRING"}
940
+ ]},
941
+ {"name": "requesttime", "type": "FLOAT"},
942
+ {"name": "bot_access", "type": "BOOLEAN"},
943
+ {"name": "loginsession", "type": "BOOLEAN"}
944
+ ]
945
+ <secondary>
946
+ type file
947
+ path error
948
+ utc
949
+ </secondary>
950
+ CONFIG
951
+
952
+ writer = stub_writer(driver)
953
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
954
+ rows: entry,
955
+ skip_invalid_rows: false,
956
+ ignore_unknown_values: false
957
+ }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
958
+ s = failure_insert_errors("timeout", d["error_count"], d["insert_error_count"])
959
+ s
960
+ end
961
+
962
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
963
+ entry.each do |e|
964
+ chunk << e.to_msgpack
965
+ end
966
+
967
+ driver.instance.start
968
+ assert_raise Fluent::BigQuery::RetryableError do
969
+ driver.instance.write(chunk)
970
+ end
971
+ driver.instance.shutdown
972
+ end
973
+ end
974
+
975
+ def test_write_with_not_retryable_insert_errors
976
+ data_input = [
977
+ { "allow_retry_insert_errors" => false, "reason" => "timeout" },
978
+ { "allow_retry_insert_errors" => true, "reason" => "stopped" },
979
+ ]
980
+ data_input.each do |d|
981
+ entry = {json: {a: "b"}}, {json: {b: "c"}}
982
+ driver = create_driver(<<-CONFIG)
983
+ table foo
984
+ email foo@bar.example
985
+ private_key_path /path/to/key
986
+ project yourproject_id
987
+ dataset yourdataset_id
988
+
989
+ allow_retry_insert_errors #{d["allow_retry_insert_errors"]}
990
+
991
+ time_format %s
992
+ time_field time
993
+
994
+ schema [
995
+ {"name": "time", "type": "INTEGER"},
996
+ {"name": "status", "type": "INTEGER"},
997
+ {"name": "bytes", "type": "INTEGER"},
998
+ {"name": "vhost", "type": "STRING"},
999
+ {"name": "path", "type": "STRING"},
1000
+ {"name": "method", "type": "STRING"},
1001
+ {"name": "protocol", "type": "STRING"},
1002
+ {"name": "agent", "type": "STRING"},
1003
+ {"name": "referer", "type": "STRING"},
1004
+ {"name": "remote", "type": "RECORD", "fields": [
1005
+ {"name": "host", "type": "STRING"},
1006
+ {"name": "ip", "type": "STRING"},
1007
+ {"name": "user", "type": "STRING"}
1008
+ ]},
1009
+ {"name": "requesttime", "type": "FLOAT"},
1010
+ {"name": "bot_access", "type": "BOOLEAN"},
1011
+ {"name": "loginsession", "type": "BOOLEAN"}
1012
+ ]
1013
+ <secondary>
1014
+ type file
1015
+ path error
1016
+ utc
1017
+ </secondary>
1018
+ CONFIG
1019
+
1020
+ writer = stub_writer(driver)
1021
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
1022
+ rows: entry,
1023
+ skip_invalid_rows: false,
1024
+ ignore_unknown_values: false
1025
+ }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
1026
+ s = failure_insert_errors(d["reason"], 1, 1)
1027
+ s
1028
+ end
1029
+
1030
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
1031
+ entry.each do |e|
1032
+ chunk << e.to_msgpack
1033
+ end
1034
+
1035
+ driver.instance.start
1036
+ driver.instance.write(chunk)
1037
+ driver.instance.shutdown
1038
+ end
1039
+ end
1040
+
889
1041
  def test_write_for_load
890
1042
  schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
891
1043
  entry = {a: "b"}, {b: "c"}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-30 00:00:00.000000000 Z
12
+ date: 2017-03-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake