fluent-plugin-bigquery 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: da6ea2c72e47fb9193731dd419914e5e0a7974a4a4b7013299547856efa94efe
4
- data.tar.gz: fc240aa17d7896d58f56e87a1c23dccad6850ecfa580c195c2cd1448ff7ba5f7
3
+ metadata.gz: b463f412345eb71d1b263bf56e0cd51ebe1c2dacffaa223293edb8d4e5776e73
4
+ data.tar.gz: f5f7766b2d0f4498239389ef38eb29ef9d20dbe9b118890e8d651b23330d33ca
5
5
  SHA512:
6
- metadata.gz: f115777a0f822c01872d9ee0e9bbc8f7da409dd4f69937a5518d82c302b42c8332775e199819830856f11ce21ec1661577fce29af25a09651e228567414384bf
7
- data.tar.gz: b2b210f0f04f7e5490dc9853cbd3ccbbf6e295a5d01eebe9052d54afdcbe4694b66cc2ea50693481a79450bdada838c9d6c73f1329f1ce8f14a0abed23dfbf5d
6
+ metadata.gz: 8d3851b83d9cbc7c802836dc5f5709d2f92009f980a3a6d3566730eea55fdaf697540c0370220441ed1d88687c27eb8677506e9897693469ef4fcb347d1e7825
7
+ data.tar.gz: 39223f99503c53a812549b4ff8de2a94c3b7db670e6dd9819840d86d561fe68c922f82c18f0201abe8625b2dbf79d0741413d21c56d9d0855b1889b68946a2f8
data/README.md CHANGED
@@ -1,7 +1,5 @@
1
1
  # fluent-plugin-bigquery
2
2
 
3
- **This README is for v2.0.0.beta**
4
-
5
3
  [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
6
4
 
7
5
  - **Plugin type**: Output
@@ -39,29 +37,30 @@ Because embbeded gem dependency sometimes restricts ruby environment.
39
37
 
40
38
  #### common
41
39
 
42
- | name | type | required? | placeholder? | default | description |
43
- | :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
44
- | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
45
- | email | string | yes (private_key) | no | nil | GCP Service Account Email |
46
- | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
47
- | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
48
- | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
49
- | project | string | yes | yes | nil | |
50
- | dataset | string | yes | yes | nil | |
51
- | table | string | yes (either `tables`) | yes | nil | |
52
- | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
53
- | auto_create_table | bool | no | no | false | If true, creates table automatically |
54
- | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
55
- | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
56
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
57
- | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
58
- | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
59
- | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
60
- | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
61
- | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
62
- | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
63
- | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition(experimental feature on BigQuery). |
64
- | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
40
+ | name | type | required? | placeholder? | default | description |
41
+ | :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
42
+ | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
43
+ | email | string | yes (private_key) | no | nil | GCP Service Account Email |
44
+ | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
45
+ | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
46
+ | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
47
+ | project | string | yes | yes | nil | |
48
+ | dataset | string | yes | yes | nil | |
49
+ | table | string | yes (either `tables`) | yes | nil | |
50
+ | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
51
+ | auto_create_table | bool | no | no | false | If true, creates table automatically |
52
+ | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
53
+ | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
54
+ | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
55
+ | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
56
+ | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
57
+ | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
58
+ | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
59
+ | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
60
+ | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
61
+ | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition(experimental feature on BigQuery). |
62
+ | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
63
+ | time_partitioning_require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. (experimental feature on BigQuery) |
65
64
 
66
65
  #### bigquery_insert
67
66
 
@@ -7,10 +7,9 @@ module Fluent
7
7
  RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
8
 
9
9
  class << self
10
- def wrap(google_api_error, message = nil, force_unretryable: false)
11
- e = google_api_error
12
- return UnRetryableError.new(message, e) if force_unretryable
13
-
10
+ # @param e [Google::Apis::Error]
11
+ # @param message [String]
12
+ def wrap(e, message = nil)
14
13
  if retryable_error?(e)
15
14
  RetryableError.new(message, e)
16
15
  else
@@ -18,12 +17,9 @@ module Fluent
18
17
  end
19
18
  end
20
19
 
21
- def retryable_error?(google_api_error)
22
- e = google_api_error
23
- reason = e.respond_to?(:reason) ? e.reason : nil
24
-
25
- retryable_error_reason?(reason) ||
26
- (e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
20
+ # @param e [Google::Apis::Error]
21
+ def retryable_error?(e)
22
+ e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
27
23
  end
28
24
 
29
25
  def retryable_error_reason?(reason)
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.0.0".freeze
3
+ VERSION = "2.1.0".freeze
4
4
  end
5
5
  end
@@ -34,13 +34,7 @@ module Fluent
34
34
  }
35
35
  }
36
36
 
37
- if @options[:time_partitioning_type]
38
- definition[:time_partitioning] = {
39
- type: @options[:time_partitioning_type].to_s.upcase,
40
- field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
41
- expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
42
- }.select { |_, value| !value.nil? }
43
- end
37
+ definition.merge!(time_partitioning: time_partitioning) if time_partitioning
44
38
  client.insert_table(project, dataset, definition, {})
45
39
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
46
40
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -51,10 +45,9 @@ module Fluent
51
45
  return
52
46
  end
53
47
 
54
- reason = e.respond_to?(:reason) ? e.reason : nil
55
- log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
48
+ log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
56
49
 
57
- if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
50
+ if create_table_retry_count < create_table_retry_limit
58
51
  sleep create_table_retry_wait
59
52
  create_table_retry_wait *= 2
60
53
  create_table_retry_count += 1
@@ -77,14 +70,19 @@ module Fluent
77
70
  nil
78
71
  end
79
72
 
80
- def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
73
+ def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
81
74
  body = {
82
75
  rows: rows,
83
76
  skip_invalid_rows: @options[:skip_invalid_rows],
84
77
  ignore_unknown_values: @options[:ignore_unknown_values],
85
78
  }
86
79
  body.merge!(template_suffix: template_suffix) if template_suffix
87
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
80
+
81
+ if @options[:auto_create_table]
82
+ res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
83
+ else
84
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
85
+ end
88
86
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
89
87
 
90
88
  if res.insert_errors && !res.insert_errors.empty?
@@ -101,8 +99,7 @@ module Fluent
101
99
  end
102
100
  end
103
101
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
104
- reason = e.respond_to?(:reason) ? e.reason : nil
105
- error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
102
+ error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
106
103
  wrapped = Fluent::BigQuery::Error.wrap(e)
107
104
  if wrapped.retryable?
108
105
  log.warn "tabledata.insertAll API", error_data
@@ -132,9 +129,6 @@ module Fluent
132
129
  dataset_id: dataset,
133
130
  table_id: table_id,
134
131
  },
135
- schema: {
136
- fields: fields.to_a,
137
- },
138
132
  write_disposition: "WRITE_APPEND",
139
133
  source_format: source_format,
140
134
  ignore_unknown_values: @options[:ignore_unknown_values],
@@ -144,17 +138,18 @@ module Fluent
144
138
  }
145
139
 
146
140
  job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
147
- configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
148
141
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
149
142
 
150
- # If target table is already exist, omit schema configuration.
151
- # Because schema changing is easier.
152
143
  begin
153
- if client.get_table(project, dataset, table_id)
154
- configuration[:configuration][:load].delete(:schema)
144
+ # Check table existance
145
+ client.get_table(project, dataset, table_id)
146
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
147
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
148
+ raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
149
+ raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
150
+ configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
151
+ configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
155
152
  end
156
- rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
157
- raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
158
153
  end
159
154
 
160
155
  res = client.insert_job(
@@ -167,19 +162,7 @@ module Fluent
167
162
  )
168
163
  JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
169
164
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
170
- reason = e.respond_to?(:reason) ? e.reason : nil
171
- log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
172
-
173
- if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
174
- # Table Not Found: Auto Create Table
175
- create_table(
176
- project,
177
- dataset,
178
- table_id,
179
- fields,
180
- )
181
- raise "table created. send rows next time."
182
- end
165
+ log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
183
166
 
184
167
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
185
168
  return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
@@ -317,6 +300,43 @@ module Fluent
317
300
  "NEWLINE_DELIMITED_JSON"
318
301
  end
319
302
  end
303
+
304
+ def time_partitioning
305
+ return @time_partitioning if instance_variable_defined?(:@time_partitioning)
306
+
307
+ if @options[:time_partitioning_type]
308
+ @time_partitioning = {
309
+ type: @options[:time_partitioning_type].to_s.upcase,
310
+ field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
311
+ expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
312
+ require_partition_filter: @options[:time_partitioning_require_partition_filter],
313
+ }.reject { |_, v| v.nil? }
314
+ else
315
+ @time_partitioning
316
+ end
317
+ end
318
+
319
+ def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
320
+ try_count ||= 1
321
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
322
+ rescue Google::Apis::ClientError => e
323
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
324
+ if try_count == 1
325
+ # Table Not Found: Auto Create Table
326
+ create_table(project, dataset, table_id, schema)
327
+ elsif try_count > 10
328
+ raise "A new table was created but it is not found."
329
+ end
330
+
331
+ # Retry to insert several times because the created table is not visible from Streaming insert for a little while
332
+ # cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
333
+ try_count += 1
334
+ sleep 5
335
+ log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
336
+ retry
337
+ end
338
+ raise
339
+ end
320
340
  end
321
341
  end
322
342
  end
@@ -69,6 +69,7 @@ module Fluent
69
69
  config_param :time_partitioning_type, :enum, list: [:day], default: nil
70
70
  config_param :time_partitioning_field, :string, default: nil
71
71
  config_param :time_partitioning_expiration, :time, default: nil
72
+ config_param :time_partitioning_require_partition_filter, :bool, default: false
72
73
 
73
74
  ## Formatter
74
75
  config_section :format do
@@ -139,8 +140,9 @@ module Fluent
139
140
  prevent_duplicate_load: @prevent_duplicate_load,
140
141
  auto_create_table: @auto_create_table,
141
142
  time_partitioning_type: @time_partitioning_type,
142
- time_partitioning_field: time_partitioning_field,
143
+ time_partitioning_field: @time_partitioning_field,
143
144
  time_partitioning_expiration: @time_partitioning_expiration,
145
+ time_partitioning_require_partition_filter: @time_partitioning_require_partition_filter,
144
146
  timeout_sec: @request_timeout_sec,
145
147
  open_timeout_sec: @request_open_timeout_sec,
146
148
  })
@@ -96,14 +96,8 @@ module Fluent
96
96
  end
97
97
 
98
98
  def insert(project, dataset, table_id, rows, schema, template_suffix)
99
- writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
99
+ writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
100
100
  rescue Fluent::BigQuery::Error => e
101
- if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
102
- # Table Not Found: Auto Create Table
103
- writer.create_table(project, dataset, table_id, schema)
104
- raise "table created. send rows next time."
105
- end
106
-
107
101
  raise if e.retryable?
108
102
 
109
103
  if @secondary
@@ -196,6 +196,7 @@ module Fluent
196
196
  rescue => e
197
197
  log.error("unexpected error while polling", error: e)
198
198
  log.error_backtrace
199
+ rollback_write(job_reference.chunk_id)
199
200
  end
200
201
  end
201
202
 
@@ -121,7 +121,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
121
121
  driver = create_driver
122
122
 
123
123
  stub_writer do |writer|
124
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
125
124
  mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
126
125
  rows: [{json: hash_including(entry)}],
127
126
  skip_invalid_rows: false,
@@ -346,10 +345,24 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
346
345
  CONFIG
347
346
 
348
347
  stub_writer do |writer|
349
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
350
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
351
- end
352
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
348
+ body = {
349
+ rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
350
+ skip_invalid_rows: false,
351
+ ignore_unknown_values: false,
352
+ }
353
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
354
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
355
+ end.at_least(1)
356
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
357
+
358
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
359
+ table_reference: {
360
+ table_id: 'foo',
361
+ },
362
+ schema: {
363
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
364
+ },
365
+ }, {})
353
366
  end
354
367
 
355
368
  assert_raise(RuntimeError) do
@@ -403,13 +416,34 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
403
416
  time_partitioning_type day
404
417
  time_partitioning_field time
405
418
  time_partitioning_expiration 1h
419
+ time_partitioning_require_partition_filter true
406
420
  CONFIG
407
421
 
408
422
  stub_writer do |writer|
409
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
410
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
411
- end
412
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
423
+ body = {
424
+ rows: [message],
425
+ skip_invalid_rows: false,
426
+ ignore_unknown_values: false,
427
+ }
428
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
429
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
430
+ end.at_least(1)
431
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
432
+
433
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
434
+ table_reference: {
435
+ table_id: 'foo',
436
+ },
437
+ schema: {
438
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
439
+ },
440
+ time_partitioning: {
441
+ type: 'DAY',
442
+ field: 'time',
443
+ expiration_ms: 3600000,
444
+ require_partition_filter: true
445
+ },
446
+ }, {})
413
447
  end
414
448
 
415
449
  assert_raise(RuntimeError) do
@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
39
39
  writer
40
40
  end
41
41
  end
42
-
43
- def test_write
44
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
45
42
 
43
+ def test_write
46
44
  response_stub = stub!
47
45
 
48
46
  driver = create_driver
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
60
58
  dataset_id: 'yourdataset_id',
61
59
  table_id: 'foo',
62
60
  },
63
- schema: {
64
- fields: schema_fields,
65
- },
66
61
  write_disposition: "WRITE_APPEND",
67
62
  source_format: "NEWLINE_DELIMITED_JSON",
68
63
  ignore_unknown_values: false,
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
99
94
  schema_path #{SCHEMA_PATH}
100
95
  prevent_duplicate_load true
101
96
  CONFIG
102
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
103
97
 
104
98
  response_stub = stub!
105
99
  stub_writer do |writer|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
116
110
  dataset_id: 'yourdataset_id',
117
111
  table_id: 'foo',
118
112
  },
119
- schema: {
120
- fields: schema_fields,
121
- },
122
113
  write_disposition: "WRITE_APPEND",
123
114
  source_format: "NEWLINE_DELIMITED_JSON",
124
115
  ignore_unknown_values: false,
@@ -138,7 +129,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
138
129
 
139
130
  def test_write_with_retryable_error
140
131
  driver = create_driver
141
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
142
132
 
143
133
  driver.instance_start
144
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
158
148
  dataset_id: 'yourdataset_id',
159
149
  table_id: 'foo',
160
150
  },
161
- schema: {
162
- fields: schema_fields,
163
- },
164
151
  write_disposition: "WRITE_APPEND",
165
152
  source_format: "NEWLINE_DELIMITED_JSON",
166
153
  ignore_unknown_values: false,
@@ -225,7 +212,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
225
212
  utc
226
213
  </secondary>
227
214
  CONFIG
228
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
229
215
 
230
216
  driver.instance_start
231
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
245
231
  dataset_id: 'yourdataset_id',
246
232
  table_id: 'foo',
247
233
  },
248
- schema: {
249
- fields: schema_fields,
250
- },
251
234
  write_disposition: "WRITE_APPEND",
252
235
  source_format: "NEWLINE_DELIMITED_JSON",
253
236
  ignore_unknown_values: false,
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
289
272
  driver.instance_shutdown
290
273
  end
291
274
 
275
+ def test_write_with_auto_create_table
276
+ driver = create_driver(<<-CONFIG)
277
+ table foo
278
+ email foo@bar.example
279
+ private_key_path /path/to/key
280
+ project yourproject_id
281
+ dataset yourdataset_id
282
+
283
+ <buffer>
284
+ @type memory
285
+ </buffer>
286
+
287
+ <inject>
288
+ time_format %s
289
+ time_key time
290
+ </inject>
291
+
292
+ auto_create_table true
293
+ schema_path #{SCHEMA_PATH}
294
+ CONFIG
295
+
296
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
297
+
298
+ stub_writer do |writer|
299
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
300
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
301
+ end
302
+
303
+ mock(writer.client).insert_job('yourproject_id', {
304
+ configuration: {
305
+ load: {
306
+ destination_table: {
307
+ project_id: 'yourproject_id',
308
+ dataset_id: 'yourdataset_id',
309
+ table_id: 'foo',
310
+ },
311
+ write_disposition: "WRITE_APPEND",
312
+ source_format: "NEWLINE_DELIMITED_JSON",
313
+ ignore_unknown_values: false,
314
+ max_bad_records: 0,
315
+ schema: {
316
+ fields: schema_fields,
317
+ },
318
+ }
319
+ }
320
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
321
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
+ end
323
+ end
324
+
325
+ driver.run do
326
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
327
+ end
328
+ end
329
+
292
330
  private
293
331
 
294
332
  def create_response_stub(response)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-09-05 00:00:00.000000000 Z
12
+ date: 2018-11-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake