fluent-plugin-bigquery 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d4074dc903c423acbebd56b2b4d6fc0ce110510
4
- data.tar.gz: 4d17cd1b2ee3768b83845105b5b9a714835e0a4c
3
+ metadata.gz: 75db2952171316995000122029bb4e4f3eeb0a45
4
+ data.tar.gz: 43d6cff7aaf69f06288f006d4f2bd97300b59dd9
5
5
  SHA512:
6
- metadata.gz: 7f99c64e394650b7eac03e6872dcfafb36981f48a726d8aba9d87fc83b45329ebac925c7d1239113995597e4697d8afcf1f9397c8583d0a3bbe11d47aedd668b
7
- data.tar.gz: 52554bcd622e75486fc8a10ceeebd8af958ac5523869f2ae964324c1348b734fcef00c4232766484a0d3112c15b50eb06f334b6efaf2cd55394321139bc1df9e
6
+ metadata.gz: 0157b43c59d7ac17e50051a261cf21f036bc1c5827a6a102d4747d7f66293fb8e1f733bebcabcd652afdf2b90a9abccbf622763d3e7cc2d9b34a382d75c4adc3
7
+ data.tar.gz: 169d3bf4a140f4dc3e5fd77707f2ed9d45ed989b23c217555ea22ef8275a1fed36129169e7b523259d767f745cc892d31605393bc40af244448b7ca81f6d62d8
data/README.md CHANGED
@@ -17,6 +17,13 @@ OAuth flow for installed applications.
17
17
  ## Notice
18
18
  If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
19
19
 
20
+ ## With docker image
21
+ If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
22
+ You need to install `bigdecimal` gem on your own dockerfile.
23
+ Because alpine based image has only minimal ruby environment in order to reduce image size.
24
+ And in most case, dependency to embedded gem is not written on gemspec.
25
+ Because embbeded gem dependency sometimes restricts ruby environment.
26
+
20
27
  ## Configuration
21
28
 
22
29
  ### Options
@@ -59,6 +66,7 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
59
66
  | replace_record_key_regexp{1-10} | string | no | nil | see examples. |
60
67
  | convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
61
68
  | insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
69
+ | allow_retry_insert_errors | bool | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
62
70
  | request_timeout_sec | integer | no | nil | Bigquery API response timeout |
63
71
  | request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
64
72
  | time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
@@ -3,6 +3,7 @@ module Fluent
3
3
  # @abstract
4
4
  class Error < StandardError
5
5
  RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
6
+ RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
6
7
  RETRYABLE_STATUS_CODE = [500, 503]
7
8
 
8
9
  class << self
@@ -29,6 +30,10 @@ module Fluent
29
30
  RETRYABLE_ERROR_REASON.include?(reason)
30
31
  end
31
32
 
33
+ def retryable_insert_errors_reason?(reason)
34
+ RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
35
+ end
36
+
32
37
  # Guard for instantiation
33
38
  private :new
34
39
  def inherited(subclass)
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.4.0".freeze
3
+ VERSION = "0.4.1".freeze
4
4
  end
5
5
  end
@@ -84,7 +84,7 @@ module Fluent
84
84
  nil
85
85
  end
86
86
 
87
- def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60)
87
+ def insert_rows(project, dataset, table_id, rows, skip_invalid_rows: false, ignore_unknown_values: false, template_suffix: nil, timeout_sec: nil, open_timeout_sec: 60, allow_retry_insert_errors: false)
88
88
  body = {
89
89
  rows: rows,
90
90
  skip_invalid_rows: skip_invalid_rows,
@@ -95,7 +95,20 @@ module Fluent
95
95
  options: {timeout_sec: timeout_sec, open_timeout_sec: open_timeout_sec}
96
96
  })
97
97
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
98
- log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s if res.insert_errors && !res.insert_errors.empty?
98
+
99
+ if res.insert_errors && !res.insert_errors.empty?
100
+ log.warn "insert errors", project_id: project, dataset: dataset, table: table_id, insert_errors: res.insert_errors.to_s
101
+ if allow_retry_insert_errors
102
+ is_included_any_retryable_insert_error = res.insert_errors.any? do |insert_error|
103
+ insert_error.errors.any? { |error| Fluent::BigQuery::Error.retryable_insert_errors_reason?(error.reason) }
104
+ end
105
+ if is_included_any_retryable_insert_error
106
+ raise Fluent::BigQuery::RetryableError.new("failed to insert into bigquery(insert errors), retry")
107
+ else
108
+ raise Fluent::BigQuery::UnRetryableError.new("failed to insert into bigquery(insert errors), and cannot retry")
109
+ end
110
+ end
111
+ end
99
112
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
100
113
  @client = nil
101
114
 
@@ -166,7 +179,7 @@ module Fluent
166
179
  end
167
180
 
168
181
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
169
- wait_load_job(chunk_id, project, dataset, job_id, table_id)
182
+ wait_load_job(chunk_id, project, dataset, job_id, table_id)
170
183
  @num_errors_per_chunk.delete(chunk_id)
171
184
  return
172
185
  end
@@ -121,6 +121,10 @@ module Fluent
121
121
 
122
122
  config_param :method, :enum, list: [:insert, :load], default: :insert, skip_accessor: true
123
123
 
124
+ # allow_retry_insert_errors (only insert)
125
+ # If insert_id_field is not specified, true means to allow duplicate rows
126
+ config_param :allow_retry_insert_errors, :bool, default: false
127
+
124
128
  # TODO
125
129
  # config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
126
130
  # config_param :insert_size_limit, :integer, default: 1000**2 # < 1MB
@@ -423,7 +427,7 @@ module Fluent
423
427
  end
424
428
 
425
429
  def insert(table_id, rows, template_suffix)
426
- writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
430
+ writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix, allow_retry_insert_errors: @allow_retry_insert_errors)
427
431
  rescue Fluent::BigQuery::Error => e
428
432
  if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
429
433
  # Table Not Found: Auto Create Table
@@ -55,6 +55,21 @@ class BigQueryOutputTest < Test::Unit::TestCase
55
55
  writer
56
56
  end
57
57
 
58
+ # ref. https://github.com/GoogleCloudPlatform/google-cloud-ruby/blob/ea2be47beb32615b2bf69f8a846a684f86c8328c/google-cloud-bigquery/test/google/cloud/bigquery/table_insert_test.rb#L141
59
+ def failure_insert_errors(reason, error_count, insert_error_count)
60
+ error = Google::Apis::BigqueryV2::ErrorProto.new(
61
+ reason: reason
62
+ )
63
+ insert_error = Google::Apis::BigqueryV2::InsertAllTableDataResponse::InsertError.new(
64
+ errors: [].fill(error, 0, error_count)
65
+ )
66
+
67
+ res = Google::Apis::BigqueryV2::InsertAllTableDataResponse.new(
68
+ insert_errors: [].fill(insert_error, 0, insert_error_count)
69
+ )
70
+ return res
71
+ end
72
+
58
73
  def test_configure_table
59
74
  driver = create_driver
60
75
  assert_equal driver.instance.table, 'foo'
@@ -886,6 +901,143 @@ class BigQueryOutputTest < Test::Unit::TestCase
886
901
  driver.instance.shutdown
887
902
  end
888
903
 
904
+ def test_write_with_retryable_insert_errors
905
+ data_input = [
906
+ { "error_count" => 1, "insert_error_count" => 1 },
907
+ { "error_count" => 10, "insert_error_count" => 1 },
908
+ { "error_count" => 10, "insert_error_count" => 10 },
909
+ ]
910
+
911
+ data_input.each do |d|
912
+ entry = {json: {a: "b"}}, {json: {b: "c"}}
913
+ allow_retry_insert_errors = true
914
+ driver = create_driver(<<-CONFIG)
915
+ table foo
916
+ email foo@bar.example
917
+ private_key_path /path/to/key
918
+ project yourproject_id
919
+ dataset yourdataset_id
920
+
921
+ allow_retry_insert_errors #{allow_retry_insert_errors}
922
+
923
+ time_format %s
924
+ time_field time
925
+
926
+ schema [
927
+ {"name": "time", "type": "INTEGER"},
928
+ {"name": "status", "type": "INTEGER"},
929
+ {"name": "bytes", "type": "INTEGER"},
930
+ {"name": "vhost", "type": "STRING"},
931
+ {"name": "path", "type": "STRING"},
932
+ {"name": "method", "type": "STRING"},
933
+ {"name": "protocol", "type": "STRING"},
934
+ {"name": "agent", "type": "STRING"},
935
+ {"name": "referer", "type": "STRING"},
936
+ {"name": "remote", "type": "RECORD", "fields": [
937
+ {"name": "host", "type": "STRING"},
938
+ {"name": "ip", "type": "STRING"},
939
+ {"name": "user", "type": "STRING"}
940
+ ]},
941
+ {"name": "requesttime", "type": "FLOAT"},
942
+ {"name": "bot_access", "type": "BOOLEAN"},
943
+ {"name": "loginsession", "type": "BOOLEAN"}
944
+ ]
945
+ <secondary>
946
+ type file
947
+ path error
948
+ utc
949
+ </secondary>
950
+ CONFIG
951
+
952
+ writer = stub_writer(driver)
953
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
954
+ rows: entry,
955
+ skip_invalid_rows: false,
956
+ ignore_unknown_values: false
957
+ }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
958
+ s = failure_insert_errors("timeout", d["error_count"], d["insert_error_count"])
959
+ s
960
+ end
961
+
962
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
963
+ entry.each do |e|
964
+ chunk << e.to_msgpack
965
+ end
966
+
967
+ driver.instance.start
968
+ assert_raise Fluent::BigQuery::RetryableError do
969
+ driver.instance.write(chunk)
970
+ end
971
+ driver.instance.shutdown
972
+ end
973
+ end
974
+
975
+ def test_write_with_not_retryable_insert_errors
976
+ data_input = [
977
+ { "allow_retry_insert_errors" => false, "reason" => "timeout" },
978
+ { "allow_retry_insert_errors" => true, "reason" => "stopped" },
979
+ ]
980
+ data_input.each do |d|
981
+ entry = {json: {a: "b"}}, {json: {b: "c"}}
982
+ driver = create_driver(<<-CONFIG)
983
+ table foo
984
+ email foo@bar.example
985
+ private_key_path /path/to/key
986
+ project yourproject_id
987
+ dataset yourdataset_id
988
+
989
+ allow_retry_insert_errors #{d["allow_retry_insert_errors"]}
990
+
991
+ time_format %s
992
+ time_field time
993
+
994
+ schema [
995
+ {"name": "time", "type": "INTEGER"},
996
+ {"name": "status", "type": "INTEGER"},
997
+ {"name": "bytes", "type": "INTEGER"},
998
+ {"name": "vhost", "type": "STRING"},
999
+ {"name": "path", "type": "STRING"},
1000
+ {"name": "method", "type": "STRING"},
1001
+ {"name": "protocol", "type": "STRING"},
1002
+ {"name": "agent", "type": "STRING"},
1003
+ {"name": "referer", "type": "STRING"},
1004
+ {"name": "remote", "type": "RECORD", "fields": [
1005
+ {"name": "host", "type": "STRING"},
1006
+ {"name": "ip", "type": "STRING"},
1007
+ {"name": "user", "type": "STRING"}
1008
+ ]},
1009
+ {"name": "requesttime", "type": "FLOAT"},
1010
+ {"name": "bot_access", "type": "BOOLEAN"},
1011
+ {"name": "loginsession", "type": "BOOLEAN"}
1012
+ ]
1013
+ <secondary>
1014
+ type file
1015
+ path error
1016
+ utc
1017
+ </secondary>
1018
+ CONFIG
1019
+
1020
+ writer = stub_writer(driver)
1021
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
1022
+ rows: entry,
1023
+ skip_invalid_rows: false,
1024
+ ignore_unknown_values: false
1025
+ }, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
1026
+ s = failure_insert_errors(d["reason"], 1, 1)
1027
+ s
1028
+ end
1029
+
1030
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
1031
+ entry.each do |e|
1032
+ chunk << e.to_msgpack
1033
+ end
1034
+
1035
+ driver.instance.start
1036
+ driver.instance.write(chunk)
1037
+ driver.instance.shutdown
1038
+ end
1039
+ end
1040
+
889
1041
  def test_write_for_load
890
1042
  schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
891
1043
  entry = {a: "b"}, {b: "c"}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-30 00:00:00.000000000 Z
12
+ date: 2017-03-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake