fluent-plugin-bigquery 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +24 -25
- data/lib/fluent/plugin/bigquery/errors.rb +6 -10
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +57 -37
- data/lib/fluent/plugin/out_bigquery_base.rb +3 -1
- data/lib/fluent/plugin/out_bigquery_insert.rb +1 -7
- data/lib/fluent/plugin/out_bigquery_load.rb +1 -0
- data/test/plugin/test_out_bigquery_insert.rb +43 -9
- data/test/plugin/test_out_bigquery_load.rb +56 -18
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b463f412345eb71d1b263bf56e0cd51ebe1c2dacffaa223293edb8d4e5776e73
|
4
|
+
data.tar.gz: f5f7766b2d0f4498239389ef38eb29ef9d20dbe9b118890e8d651b23330d33ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d3851b83d9cbc7c802836dc5f5709d2f92009f980a3a6d3566730eea55fdaf697540c0370220441ed1d88687c27eb8677506e9897693469ef4fcb347d1e7825
|
7
|
+
data.tar.gz: 39223f99503c53a812549b4ff8de2a94c3b7db670e6dd9819840d86d561fe68c922f82c18f0201abe8625b2dbf79d0741413d21c56d9d0855b1889b68946a2f8
|
data/README.md
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# fluent-plugin-bigquery
|
2
2
|
|
3
|
-
**This README is for v2.0.0.beta**
|
4
|
-
|
5
3
|
[Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
|
6
4
|
|
7
5
|
- **Plugin type**: Output
|
@@ -39,29 +37,30 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
39
37
|
|
40
38
|
#### common
|
41
39
|
|
42
|
-
| name
|
43
|
-
|
|
44
|
-
| auth_method
|
45
|
-
| email
|
46
|
-
| private_key_path
|
47
|
-
| private_key_passphrase
|
48
|
-
| json_key
|
49
|
-
| project
|
50
|
-
| dataset
|
51
|
-
| table
|
52
|
-
| tables
|
53
|
-
| auto_create_table
|
54
|
-
| ignore_unknown_values
|
55
|
-
| schema
|
56
|
-
| schema_path
|
57
|
-
| fetch_schema
|
58
|
-
| fetch_schema_table
|
59
|
-
| schema_cache_expire
|
60
|
-
| request_timeout_sec
|
61
|
-
| request_open_timeout_sec
|
62
|
-
| time_partitioning_type
|
63
|
-
| time_partitioning_field
|
64
|
-
| time_partitioning_expiration
|
40
|
+
| name | type | required? | placeholder? | default | description |
|
41
|
+
| :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
42
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
43
|
+
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
44
|
+
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
45
|
+
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
46
|
+
| json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
|
47
|
+
| project | string | yes | yes | nil | |
|
48
|
+
| dataset | string | yes | yes | nil | |
|
49
|
+
| table | string | yes (either `tables`) | yes | nil | |
|
50
|
+
| tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
|
51
|
+
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
52
|
+
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
53
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
54
|
+
| schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
|
55
|
+
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
56
|
+
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
57
|
+
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
58
|
+
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
59
|
+
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
60
|
+
| time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
61
|
+
| time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition(experimental feature on BigQuery). |
|
62
|
+
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
63
|
+
| time_partitioning_require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. (experimental feature on BigQuery) |
|
65
64
|
|
66
65
|
#### bigquery_insert
|
67
66
|
|
@@ -7,10 +7,9 @@ module Fluent
|
|
7
7
|
RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
|
8
8
|
|
9
9
|
class << self
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
# @param e [Google::Apis::Error]
|
11
|
+
# @param message [String]
|
12
|
+
def wrap(e, message = nil)
|
14
13
|
if retryable_error?(e)
|
15
14
|
RetryableError.new(message, e)
|
16
15
|
else
|
@@ -18,12 +17,9 @@ module Fluent
|
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
retryable_error_reason?(reason) ||
|
26
|
-
(e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
|
20
|
+
# @param e [Google::Apis::Error]
|
21
|
+
def retryable_error?(e)
|
22
|
+
e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
|
27
23
|
end
|
28
24
|
|
29
25
|
def retryable_error_reason?(reason)
|
@@ -34,13 +34,7 @@ module Fluent
|
|
34
34
|
}
|
35
35
|
}
|
36
36
|
|
37
|
-
if
|
38
|
-
definition[:time_partitioning] = {
|
39
|
-
type: @options[:time_partitioning_type].to_s.upcase,
|
40
|
-
field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
|
41
|
-
expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
|
42
|
-
}.select { |_, value| !value.nil? }
|
43
|
-
end
|
37
|
+
definition.merge!(time_partitioning: time_partitioning) if time_partitioning
|
44
38
|
client.insert_table(project, dataset, definition, {})
|
45
39
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
46
40
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
@@ -51,10 +45,9 @@ module Fluent
|
|
51
45
|
return
|
52
46
|
end
|
53
47
|
|
54
|
-
|
55
|
-
log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
|
48
|
+
log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
|
56
49
|
|
57
|
-
if
|
50
|
+
if create_table_retry_count < create_table_retry_limit
|
58
51
|
sleep create_table_retry_wait
|
59
52
|
create_table_retry_wait *= 2
|
60
53
|
create_table_retry_count += 1
|
@@ -77,14 +70,19 @@ module Fluent
|
|
77
70
|
nil
|
78
71
|
end
|
79
72
|
|
80
|
-
def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
|
73
|
+
def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
|
81
74
|
body = {
|
82
75
|
rows: rows,
|
83
76
|
skip_invalid_rows: @options[:skip_invalid_rows],
|
84
77
|
ignore_unknown_values: @options[:ignore_unknown_values],
|
85
78
|
}
|
86
79
|
body.merge!(template_suffix: template_suffix) if template_suffix
|
87
|
-
|
80
|
+
|
81
|
+
if @options[:auto_create_table]
|
82
|
+
res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
83
|
+
else
|
84
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
85
|
+
end
|
88
86
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
89
87
|
|
90
88
|
if res.insert_errors && !res.insert_errors.empty?
|
@@ -101,8 +99,7 @@ module Fluent
|
|
101
99
|
end
|
102
100
|
end
|
103
101
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
104
|
-
|
105
|
-
error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
|
102
|
+
error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
|
106
103
|
wrapped = Fluent::BigQuery::Error.wrap(e)
|
107
104
|
if wrapped.retryable?
|
108
105
|
log.warn "tabledata.insertAll API", error_data
|
@@ -132,9 +129,6 @@ module Fluent
|
|
132
129
|
dataset_id: dataset,
|
133
130
|
table_id: table_id,
|
134
131
|
},
|
135
|
-
schema: {
|
136
|
-
fields: fields.to_a,
|
137
|
-
},
|
138
132
|
write_disposition: "WRITE_APPEND",
|
139
133
|
source_format: source_format,
|
140
134
|
ignore_unknown_values: @options[:ignore_unknown_values],
|
@@ -144,17 +138,18 @@ module Fluent
|
|
144
138
|
}
|
145
139
|
|
146
140
|
job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
|
147
|
-
configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
|
148
141
|
configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
|
149
142
|
|
150
|
-
# If target table is already exist, omit schema configuration.
|
151
|
-
# Because schema changing is easier.
|
152
143
|
begin
|
153
|
-
|
154
|
-
|
144
|
+
# Check table existance
|
145
|
+
client.get_table(project, dataset, table_id)
|
146
|
+
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
147
|
+
if e.status_code == 404 && /Not Found: Table/i =~ e.message
|
148
|
+
raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
|
149
|
+
raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
|
150
|
+
configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
|
151
|
+
configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
|
155
152
|
end
|
156
|
-
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
|
157
|
-
raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
|
158
153
|
end
|
159
154
|
|
160
155
|
res = client.insert_job(
|
@@ -167,19 +162,7 @@ module Fluent
|
|
167
162
|
)
|
168
163
|
JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
|
169
164
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
170
|
-
|
171
|
-
log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
|
172
|
-
|
173
|
-
if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
174
|
-
# Table Not Found: Auto Create Table
|
175
|
-
create_table(
|
176
|
-
project,
|
177
|
-
dataset,
|
178
|
-
table_id,
|
179
|
-
fields,
|
180
|
-
)
|
181
|
-
raise "table created. send rows next time."
|
182
|
-
end
|
165
|
+
log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
|
183
166
|
|
184
167
|
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
185
168
|
return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
|
@@ -317,6 +300,43 @@ module Fluent
|
|
317
300
|
"NEWLINE_DELIMITED_JSON"
|
318
301
|
end
|
319
302
|
end
|
303
|
+
|
304
|
+
def time_partitioning
|
305
|
+
return @time_partitioning if instance_variable_defined?(:@time_partitioning)
|
306
|
+
|
307
|
+
if @options[:time_partitioning_type]
|
308
|
+
@time_partitioning = {
|
309
|
+
type: @options[:time_partitioning_type].to_s.upcase,
|
310
|
+
field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
|
311
|
+
expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
|
312
|
+
require_partition_filter: @options[:time_partitioning_require_partition_filter],
|
313
|
+
}.reject { |_, v| v.nil? }
|
314
|
+
else
|
315
|
+
@time_partitioning
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
320
|
+
try_count ||= 1
|
321
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
322
|
+
rescue Google::Apis::ClientError => e
|
323
|
+
if e.status_code == 404 && /Not Found: Table/i =~ e.message
|
324
|
+
if try_count == 1
|
325
|
+
# Table Not Found: Auto Create Table
|
326
|
+
create_table(project, dataset, table_id, schema)
|
327
|
+
elsif try_count > 10
|
328
|
+
raise "A new table was created but it is not found."
|
329
|
+
end
|
330
|
+
|
331
|
+
# Retry to insert several times because the created table is not visible from Streaming insert for a little while
|
332
|
+
# cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
|
333
|
+
try_count += 1
|
334
|
+
sleep 5
|
335
|
+
log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
|
336
|
+
retry
|
337
|
+
end
|
338
|
+
raise
|
339
|
+
end
|
320
340
|
end
|
321
341
|
end
|
322
342
|
end
|
@@ -69,6 +69,7 @@ module Fluent
|
|
69
69
|
config_param :time_partitioning_type, :enum, list: [:day], default: nil
|
70
70
|
config_param :time_partitioning_field, :string, default: nil
|
71
71
|
config_param :time_partitioning_expiration, :time, default: nil
|
72
|
+
config_param :time_partitioning_require_partition_filter, :bool, default: false
|
72
73
|
|
73
74
|
## Formatter
|
74
75
|
config_section :format do
|
@@ -139,8 +140,9 @@ module Fluent
|
|
139
140
|
prevent_duplicate_load: @prevent_duplicate_load,
|
140
141
|
auto_create_table: @auto_create_table,
|
141
142
|
time_partitioning_type: @time_partitioning_type,
|
142
|
-
time_partitioning_field: time_partitioning_field,
|
143
|
+
time_partitioning_field: @time_partitioning_field,
|
143
144
|
time_partitioning_expiration: @time_partitioning_expiration,
|
145
|
+
time_partitioning_require_partition_filter: @time_partitioning_require_partition_filter,
|
144
146
|
timeout_sec: @request_timeout_sec,
|
145
147
|
open_timeout_sec: @request_open_timeout_sec,
|
146
148
|
})
|
@@ -96,14 +96,8 @@ module Fluent
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def insert(project, dataset, table_id, rows, schema, template_suffix)
|
99
|
-
writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
|
99
|
+
writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
|
100
100
|
rescue Fluent::BigQuery::Error => e
|
101
|
-
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
102
|
-
# Table Not Found: Auto Create Table
|
103
|
-
writer.create_table(project, dataset, table_id, schema)
|
104
|
-
raise "table created. send rows next time."
|
105
|
-
end
|
106
|
-
|
107
101
|
raise if e.retryable?
|
108
102
|
|
109
103
|
if @secondary
|
@@ -121,7 +121,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
121
121
|
driver = create_driver
|
122
122
|
|
123
123
|
stub_writer do |writer|
|
124
|
-
mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
|
125
124
|
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
126
125
|
rows: [{json: hash_including(entry)}],
|
127
126
|
skip_invalid_rows: false,
|
@@ -346,10 +345,24 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
346
345
|
CONFIG
|
347
346
|
|
348
347
|
stub_writer do |writer|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
348
|
+
body = {
|
349
|
+
rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
|
350
|
+
skip_invalid_rows: false,
|
351
|
+
ignore_unknown_values: false,
|
352
|
+
}
|
353
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
|
354
|
+
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
355
|
+
end.at_least(1)
|
356
|
+
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
357
|
+
|
358
|
+
mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
|
359
|
+
table_reference: {
|
360
|
+
table_id: 'foo',
|
361
|
+
},
|
362
|
+
schema: {
|
363
|
+
fields: driver.instance.instance_variable_get(:@table_schema).to_a,
|
364
|
+
},
|
365
|
+
}, {})
|
353
366
|
end
|
354
367
|
|
355
368
|
assert_raise(RuntimeError) do
|
@@ -403,13 +416,34 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
403
416
|
time_partitioning_type day
|
404
417
|
time_partitioning_field time
|
405
418
|
time_partitioning_expiration 1h
|
419
|
+
time_partitioning_require_partition_filter true
|
406
420
|
CONFIG
|
407
421
|
|
408
422
|
stub_writer do |writer|
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
423
|
+
body = {
|
424
|
+
rows: [message],
|
425
|
+
skip_invalid_rows: false,
|
426
|
+
ignore_unknown_values: false,
|
427
|
+
}
|
428
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
|
429
|
+
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
430
|
+
end.at_least(1)
|
431
|
+
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
432
|
+
|
433
|
+
mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
|
434
|
+
table_reference: {
|
435
|
+
table_id: 'foo',
|
436
|
+
},
|
437
|
+
schema: {
|
438
|
+
fields: driver.instance.instance_variable_get(:@table_schema).to_a,
|
439
|
+
},
|
440
|
+
time_partitioning: {
|
441
|
+
type: 'DAY',
|
442
|
+
field: 'time',
|
443
|
+
expiration_ms: 3600000,
|
444
|
+
require_partition_filter: true
|
445
|
+
},
|
446
|
+
}, {})
|
413
447
|
end
|
414
448
|
|
415
449
|
assert_raise(RuntimeError) do
|
@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
39
39
|
writer
|
40
40
|
end
|
41
41
|
end
|
42
|
-
|
43
|
-
def test_write
|
44
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
45
42
|
|
43
|
+
def test_write
|
46
44
|
response_stub = stub!
|
47
45
|
|
48
46
|
driver = create_driver
|
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
60
58
|
dataset_id: 'yourdataset_id',
|
61
59
|
table_id: 'foo',
|
62
60
|
},
|
63
|
-
schema: {
|
64
|
-
fields: schema_fields,
|
65
|
-
},
|
66
61
|
write_disposition: "WRITE_APPEND",
|
67
62
|
source_format: "NEWLINE_DELIMITED_JSON",
|
68
63
|
ignore_unknown_values: false,
|
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
99
94
|
schema_path #{SCHEMA_PATH}
|
100
95
|
prevent_duplicate_load true
|
101
96
|
CONFIG
|
102
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
103
97
|
|
104
98
|
response_stub = stub!
|
105
99
|
stub_writer do |writer|
|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
116
110
|
dataset_id: 'yourdataset_id',
|
117
111
|
table_id: 'foo',
|
118
112
|
},
|
119
|
-
schema: {
|
120
|
-
fields: schema_fields,
|
121
|
-
},
|
122
113
|
write_disposition: "WRITE_APPEND",
|
123
114
|
source_format: "NEWLINE_DELIMITED_JSON",
|
124
115
|
ignore_unknown_values: false,
|
@@ -138,7 +129,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
138
129
|
|
139
130
|
def test_write_with_retryable_error
|
140
131
|
driver = create_driver
|
141
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
142
132
|
|
143
133
|
driver.instance_start
|
144
134
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
158
148
|
dataset_id: 'yourdataset_id',
|
159
149
|
table_id: 'foo',
|
160
150
|
},
|
161
|
-
schema: {
|
162
|
-
fields: schema_fields,
|
163
|
-
},
|
164
151
|
write_disposition: "WRITE_APPEND",
|
165
152
|
source_format: "NEWLINE_DELIMITED_JSON",
|
166
153
|
ignore_unknown_values: false,
|
@@ -225,7 +212,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
225
212
|
utc
|
226
213
|
</secondary>
|
227
214
|
CONFIG
|
228
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
229
215
|
|
230
216
|
driver.instance_start
|
231
217
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
245
231
|
dataset_id: 'yourdataset_id',
|
246
232
|
table_id: 'foo',
|
247
233
|
},
|
248
|
-
schema: {
|
249
|
-
fields: schema_fields,
|
250
|
-
},
|
251
234
|
write_disposition: "WRITE_APPEND",
|
252
235
|
source_format: "NEWLINE_DELIMITED_JSON",
|
253
236
|
ignore_unknown_values: false,
|
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
289
272
|
driver.instance_shutdown
|
290
273
|
end
|
291
274
|
|
275
|
+
def test_write_with_auto_create_table
|
276
|
+
driver = create_driver(<<-CONFIG)
|
277
|
+
table foo
|
278
|
+
email foo@bar.example
|
279
|
+
private_key_path /path/to/key
|
280
|
+
project yourproject_id
|
281
|
+
dataset yourdataset_id
|
282
|
+
|
283
|
+
<buffer>
|
284
|
+
@type memory
|
285
|
+
</buffer>
|
286
|
+
|
287
|
+
<inject>
|
288
|
+
time_format %s
|
289
|
+
time_key time
|
290
|
+
</inject>
|
291
|
+
|
292
|
+
auto_create_table true
|
293
|
+
schema_path #{SCHEMA_PATH}
|
294
|
+
CONFIG
|
295
|
+
|
296
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
297
|
+
|
298
|
+
stub_writer do |writer|
|
299
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
|
300
|
+
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
301
|
+
end
|
302
|
+
|
303
|
+
mock(writer.client).insert_job('yourproject_id', {
|
304
|
+
configuration: {
|
305
|
+
load: {
|
306
|
+
destination_table: {
|
307
|
+
project_id: 'yourproject_id',
|
308
|
+
dataset_id: 'yourdataset_id',
|
309
|
+
table_id: 'foo',
|
310
|
+
},
|
311
|
+
write_disposition: "WRITE_APPEND",
|
312
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
313
|
+
ignore_unknown_values: false,
|
314
|
+
max_bad_records: 0,
|
315
|
+
schema: {
|
316
|
+
fields: schema_fields,
|
317
|
+
},
|
318
|
+
}
|
319
|
+
}
|
320
|
+
}, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
|
321
|
+
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
driver.run do
|
326
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b"})
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
292
330
|
private
|
293
331
|
|
294
332
|
def create_response_stub(response)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-11-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|