fluent-plugin-bigquery 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: da6ea2c72e47fb9193731dd419914e5e0a7974a4a4b7013299547856efa94efe
4
- data.tar.gz: fc240aa17d7896d58f56e87a1c23dccad6850ecfa580c195c2cd1448ff7ba5f7
3
+ metadata.gz: b463f412345eb71d1b263bf56e0cd51ebe1c2dacffaa223293edb8d4e5776e73
4
+ data.tar.gz: f5f7766b2d0f4498239389ef38eb29ef9d20dbe9b118890e8d651b23330d33ca
5
5
  SHA512:
6
- metadata.gz: f115777a0f822c01872d9ee0e9bbc8f7da409dd4f69937a5518d82c302b42c8332775e199819830856f11ce21ec1661577fce29af25a09651e228567414384bf
7
- data.tar.gz: b2b210f0f04f7e5490dc9853cbd3ccbbf6e295a5d01eebe9052d54afdcbe4694b66cc2ea50693481a79450bdada838c9d6c73f1329f1ce8f14a0abed23dfbf5d
6
+ metadata.gz: 8d3851b83d9cbc7c802836dc5f5709d2f92009f980a3a6d3566730eea55fdaf697540c0370220441ed1d88687c27eb8677506e9897693469ef4fcb347d1e7825
7
+ data.tar.gz: 39223f99503c53a812549b4ff8de2a94c3b7db670e6dd9819840d86d561fe68c922f82c18f0201abe8625b2dbf79d0741413d21c56d9d0855b1889b68946a2f8
data/README.md CHANGED
@@ -1,7 +1,5 @@
1
1
  # fluent-plugin-bigquery
2
2
 
3
- **This README is for v2.0.0.beta**
4
-
5
3
  [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
6
4
 
7
5
  - **Plugin type**: Output
@@ -39,29 +37,30 @@ Because embbeded gem dependency sometimes restricts ruby environment.
39
37
 
40
38
  #### common
41
39
 
42
- | name | type | required? | placeholder? | default | description |
43
- | :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
44
- | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
45
- | email | string | yes (private_key) | no | nil | GCP Service Account Email |
46
- | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
47
- | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
48
- | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
49
- | project | string | yes | yes | nil | |
50
- | dataset | string | yes | yes | nil | |
51
- | table | string | yes (either `tables`) | yes | nil | |
52
- | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
53
- | auto_create_table | bool | no | no | false | If true, creates table automatically |
54
- | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
55
- | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
56
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
57
- | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
58
- | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
59
- | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
60
- | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
61
- | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
62
- | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
63
- | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition(experimental feature on BigQuery). |
64
- | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
40
+ | name | type | required? | placeholder? | default | description |
41
+ | :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
42
+ | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
43
+ | email | string | yes (private_key) | no | nil | GCP Service Account Email |
44
+ | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
45
+ | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
46
+ | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
47
+ | project | string | yes | yes | nil | |
48
+ | dataset | string | yes | yes | nil | |
49
+ | table | string | yes (either `tables`) | yes | nil | |
50
+ | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
51
+ | auto_create_table | bool | no | no | false | If true, creates table automatically |
52
+ | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
53
+ | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
54
+ | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
55
+ | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
56
+ | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
57
+ | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
58
+ | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
59
+ | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
60
+ | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
61
+ | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition(experimental feature on BigQuery). |
62
+ | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
63
+ | time_partitioning_require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. (experimental feature on BigQuery) |
65
64
 
66
65
  #### bigquery_insert
67
66
 
@@ -7,10 +7,9 @@ module Fluent
7
7
  RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
8
 
9
9
  class << self
10
- def wrap(google_api_error, message = nil, force_unretryable: false)
11
- e = google_api_error
12
- return UnRetryableError.new(message, e) if force_unretryable
13
-
10
+ # @param e [Google::Apis::Error]
11
+ # @param message [String]
12
+ def wrap(e, message = nil)
14
13
  if retryable_error?(e)
15
14
  RetryableError.new(message, e)
16
15
  else
@@ -18,12 +17,9 @@ module Fluent
18
17
  end
19
18
  end
20
19
 
21
- def retryable_error?(google_api_error)
22
- e = google_api_error
23
- reason = e.respond_to?(:reason) ? e.reason : nil
24
-
25
- retryable_error_reason?(reason) ||
26
- (e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
20
+ # @param e [Google::Apis::Error]
21
+ def retryable_error?(e)
22
+ e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
27
23
  end
28
24
 
29
25
  def retryable_error_reason?(reason)
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.0.0".freeze
3
+ VERSION = "2.1.0".freeze
4
4
  end
5
5
  end
@@ -34,13 +34,7 @@ module Fluent
34
34
  }
35
35
  }
36
36
 
37
- if @options[:time_partitioning_type]
38
- definition[:time_partitioning] = {
39
- type: @options[:time_partitioning_type].to_s.upcase,
40
- field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
41
- expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
42
- }.select { |_, value| !value.nil? }
43
- end
37
+ definition.merge!(time_partitioning: time_partitioning) if time_partitioning
44
38
  client.insert_table(project, dataset, definition, {})
45
39
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
46
40
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -51,10 +45,9 @@ module Fluent
51
45
  return
52
46
  end
53
47
 
54
- reason = e.respond_to?(:reason) ? e.reason : nil
55
- log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
48
+ log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
56
49
 
57
- if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
50
+ if create_table_retry_count < create_table_retry_limit
58
51
  sleep create_table_retry_wait
59
52
  create_table_retry_wait *= 2
60
53
  create_table_retry_count += 1
@@ -77,14 +70,19 @@ module Fluent
77
70
  nil
78
71
  end
79
72
 
80
- def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
73
+ def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
81
74
  body = {
82
75
  rows: rows,
83
76
  skip_invalid_rows: @options[:skip_invalid_rows],
84
77
  ignore_unknown_values: @options[:ignore_unknown_values],
85
78
  }
86
79
  body.merge!(template_suffix: template_suffix) if template_suffix
87
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
80
+
81
+ if @options[:auto_create_table]
82
+ res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
83
+ else
84
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
85
+ end
88
86
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
89
87
 
90
88
  if res.insert_errors && !res.insert_errors.empty?
@@ -101,8 +99,7 @@ module Fluent
101
99
  end
102
100
  end
103
101
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
104
- reason = e.respond_to?(:reason) ? e.reason : nil
105
- error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
102
+ error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
106
103
  wrapped = Fluent::BigQuery::Error.wrap(e)
107
104
  if wrapped.retryable?
108
105
  log.warn "tabledata.insertAll API", error_data
@@ -132,9 +129,6 @@ module Fluent
132
129
  dataset_id: dataset,
133
130
  table_id: table_id,
134
131
  },
135
- schema: {
136
- fields: fields.to_a,
137
- },
138
132
  write_disposition: "WRITE_APPEND",
139
133
  source_format: source_format,
140
134
  ignore_unknown_values: @options[:ignore_unknown_values],
@@ -144,17 +138,18 @@ module Fluent
144
138
  }
145
139
 
146
140
  job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
147
- configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
148
141
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
149
142
 
150
- # If target table is already exist, omit schema configuration.
151
- # Because schema changing is easier.
152
143
  begin
153
- if client.get_table(project, dataset, table_id)
154
- configuration[:configuration][:load].delete(:schema)
144
+ # Check table existance
145
+ client.get_table(project, dataset, table_id)
146
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
147
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
148
+ raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
149
+ raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
150
+ configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
151
+ configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
155
152
  end
156
- rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
157
- raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
158
153
  end
159
154
 
160
155
  res = client.insert_job(
@@ -167,19 +162,7 @@ module Fluent
167
162
  )
168
163
  JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
169
164
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
170
- reason = e.respond_to?(:reason) ? e.reason : nil
171
- log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
172
-
173
- if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
174
- # Table Not Found: Auto Create Table
175
- create_table(
176
- project,
177
- dataset,
178
- table_id,
179
- fields,
180
- )
181
- raise "table created. send rows next time."
182
- end
165
+ log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
183
166
 
184
167
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
185
168
  return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
@@ -317,6 +300,43 @@ module Fluent
317
300
  "NEWLINE_DELIMITED_JSON"
318
301
  end
319
302
  end
303
+
304
+ def time_partitioning
305
+ return @time_partitioning if instance_variable_defined?(:@time_partitioning)
306
+
307
+ if @options[:time_partitioning_type]
308
+ @time_partitioning = {
309
+ type: @options[:time_partitioning_type].to_s.upcase,
310
+ field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
311
+ expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
312
+ require_partition_filter: @options[:time_partitioning_require_partition_filter],
313
+ }.reject { |_, v| v.nil? }
314
+ else
315
+ @time_partitioning
316
+ end
317
+ end
318
+
319
+ def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
320
+ try_count ||= 1
321
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
322
+ rescue Google::Apis::ClientError => e
323
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
324
+ if try_count == 1
325
+ # Table Not Found: Auto Create Table
326
+ create_table(project, dataset, table_id, schema)
327
+ elsif try_count > 10
328
+ raise "A new table was created but it is not found."
329
+ end
330
+
331
+ # Retry to insert several times because the created table is not visible from Streaming insert for a little while
332
+ # cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
333
+ try_count += 1
334
+ sleep 5
335
+ log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
336
+ retry
337
+ end
338
+ raise
339
+ end
320
340
  end
321
341
  end
322
342
  end
@@ -69,6 +69,7 @@ module Fluent
69
69
  config_param :time_partitioning_type, :enum, list: [:day], default: nil
70
70
  config_param :time_partitioning_field, :string, default: nil
71
71
  config_param :time_partitioning_expiration, :time, default: nil
72
+ config_param :time_partitioning_require_partition_filter, :bool, default: false
72
73
 
73
74
  ## Formatter
74
75
  config_section :format do
@@ -139,8 +140,9 @@ module Fluent
139
140
  prevent_duplicate_load: @prevent_duplicate_load,
140
141
  auto_create_table: @auto_create_table,
141
142
  time_partitioning_type: @time_partitioning_type,
142
- time_partitioning_field: time_partitioning_field,
143
+ time_partitioning_field: @time_partitioning_field,
143
144
  time_partitioning_expiration: @time_partitioning_expiration,
145
+ time_partitioning_require_partition_filter: @time_partitioning_require_partition_filter,
144
146
  timeout_sec: @request_timeout_sec,
145
147
  open_timeout_sec: @request_open_timeout_sec,
146
148
  })
@@ -96,14 +96,8 @@ module Fluent
96
96
  end
97
97
 
98
98
  def insert(project, dataset, table_id, rows, schema, template_suffix)
99
- writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
99
+ writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
100
100
  rescue Fluent::BigQuery::Error => e
101
- if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
102
- # Table Not Found: Auto Create Table
103
- writer.create_table(project, dataset, table_id, schema)
104
- raise "table created. send rows next time."
105
- end
106
-
107
101
  raise if e.retryable?
108
102
 
109
103
  if @secondary
@@ -196,6 +196,7 @@ module Fluent
196
196
  rescue => e
197
197
  log.error("unexpected error while polling", error: e)
198
198
  log.error_backtrace
199
+ rollback_write(job_reference.chunk_id)
199
200
  end
200
201
  end
201
202
 
@@ -121,7 +121,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
121
121
  driver = create_driver
122
122
 
123
123
  stub_writer do |writer|
124
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
125
124
  mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
126
125
  rows: [{json: hash_including(entry)}],
127
126
  skip_invalid_rows: false,
@@ -346,10 +345,24 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
346
345
  CONFIG
347
346
 
348
347
  stub_writer do |writer|
349
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
350
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
351
- end
352
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
348
+ body = {
349
+ rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
350
+ skip_invalid_rows: false,
351
+ ignore_unknown_values: false,
352
+ }
353
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
354
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
355
+ end.at_least(1)
356
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
357
+
358
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
359
+ table_reference: {
360
+ table_id: 'foo',
361
+ },
362
+ schema: {
363
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
364
+ },
365
+ }, {})
353
366
  end
354
367
 
355
368
  assert_raise(RuntimeError) do
@@ -403,13 +416,34 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
403
416
  time_partitioning_type day
404
417
  time_partitioning_field time
405
418
  time_partitioning_expiration 1h
419
+ time_partitioning_require_partition_filter true
406
420
  CONFIG
407
421
 
408
422
  stub_writer do |writer|
409
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
410
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
411
- end
412
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
423
+ body = {
424
+ rows: [message],
425
+ skip_invalid_rows: false,
426
+ ignore_unknown_values: false,
427
+ }
428
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
429
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
430
+ end.at_least(1)
431
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
432
+
433
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
434
+ table_reference: {
435
+ table_id: 'foo',
436
+ },
437
+ schema: {
438
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
439
+ },
440
+ time_partitioning: {
441
+ type: 'DAY',
442
+ field: 'time',
443
+ expiration_ms: 3600000,
444
+ require_partition_filter: true
445
+ },
446
+ }, {})
413
447
  end
414
448
 
415
449
  assert_raise(RuntimeError) do
@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
39
39
  writer
40
40
  end
41
41
  end
42
-
43
- def test_write
44
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
45
42
 
43
+ def test_write
46
44
  response_stub = stub!
47
45
 
48
46
  driver = create_driver
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
60
58
  dataset_id: 'yourdataset_id',
61
59
  table_id: 'foo',
62
60
  },
63
- schema: {
64
- fields: schema_fields,
65
- },
66
61
  write_disposition: "WRITE_APPEND",
67
62
  source_format: "NEWLINE_DELIMITED_JSON",
68
63
  ignore_unknown_values: false,
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
99
94
  schema_path #{SCHEMA_PATH}
100
95
  prevent_duplicate_load true
101
96
  CONFIG
102
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
103
97
 
104
98
  response_stub = stub!
105
99
  stub_writer do |writer|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
116
110
  dataset_id: 'yourdataset_id',
117
111
  table_id: 'foo',
118
112
  },
119
- schema: {
120
- fields: schema_fields,
121
- },
122
113
  write_disposition: "WRITE_APPEND",
123
114
  source_format: "NEWLINE_DELIMITED_JSON",
124
115
  ignore_unknown_values: false,
@@ -138,7 +129,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
138
129
 
139
130
  def test_write_with_retryable_error
140
131
  driver = create_driver
141
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
142
132
 
143
133
  driver.instance_start
144
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
158
148
  dataset_id: 'yourdataset_id',
159
149
  table_id: 'foo',
160
150
  },
161
- schema: {
162
- fields: schema_fields,
163
- },
164
151
  write_disposition: "WRITE_APPEND",
165
152
  source_format: "NEWLINE_DELIMITED_JSON",
166
153
  ignore_unknown_values: false,
@@ -225,7 +212,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
225
212
  utc
226
213
  </secondary>
227
214
  CONFIG
228
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
229
215
 
230
216
  driver.instance_start
231
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
245
231
  dataset_id: 'yourdataset_id',
246
232
  table_id: 'foo',
247
233
  },
248
- schema: {
249
- fields: schema_fields,
250
- },
251
234
  write_disposition: "WRITE_APPEND",
252
235
  source_format: "NEWLINE_DELIMITED_JSON",
253
236
  ignore_unknown_values: false,
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
289
272
  driver.instance_shutdown
290
273
  end
291
274
 
275
+ def test_write_with_auto_create_table
276
+ driver = create_driver(<<-CONFIG)
277
+ table foo
278
+ email foo@bar.example
279
+ private_key_path /path/to/key
280
+ project yourproject_id
281
+ dataset yourdataset_id
282
+
283
+ <buffer>
284
+ @type memory
285
+ </buffer>
286
+
287
+ <inject>
288
+ time_format %s
289
+ time_key time
290
+ </inject>
291
+
292
+ auto_create_table true
293
+ schema_path #{SCHEMA_PATH}
294
+ CONFIG
295
+
296
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
297
+
298
+ stub_writer do |writer|
299
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
300
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
301
+ end
302
+
303
+ mock(writer.client).insert_job('yourproject_id', {
304
+ configuration: {
305
+ load: {
306
+ destination_table: {
307
+ project_id: 'yourproject_id',
308
+ dataset_id: 'yourdataset_id',
309
+ table_id: 'foo',
310
+ },
311
+ write_disposition: "WRITE_APPEND",
312
+ source_format: "NEWLINE_DELIMITED_JSON",
313
+ ignore_unknown_values: false,
314
+ max_bad_records: 0,
315
+ schema: {
316
+ fields: schema_fields,
317
+ },
318
+ }
319
+ }
320
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
321
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
+ end
323
+ end
324
+
325
+ driver.run do
326
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
327
+ end
328
+ end
329
+
292
330
  private
293
331
 
294
332
  def create_response_stub(response)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-09-05 00:00:00.000000000 Z
12
+ date: 2018-11-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake