fluent-plugin-bigquery 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +24 -25
- data/lib/fluent/plugin/bigquery/errors.rb +6 -10
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +57 -37
- data/lib/fluent/plugin/out_bigquery_base.rb +3 -1
- data/lib/fluent/plugin/out_bigquery_insert.rb +1 -7
- data/lib/fluent/plugin/out_bigquery_load.rb +1 -0
- data/test/plugin/test_out_bigquery_insert.rb +43 -9
- data/test/plugin/test_out_bigquery_load.rb +56 -18
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b463f412345eb71d1b263bf56e0cd51ebe1c2dacffaa223293edb8d4e5776e73
|
|
4
|
+
data.tar.gz: f5f7766b2d0f4498239389ef38eb29ef9d20dbe9b118890e8d651b23330d33ca
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8d3851b83d9cbc7c802836dc5f5709d2f92009f980a3a6d3566730eea55fdaf697540c0370220441ed1d88687c27eb8677506e9897693469ef4fcb347d1e7825
|
|
7
|
+
data.tar.gz: 39223f99503c53a812549b4ff8de2a94c3b7db670e6dd9819840d86d561fe68c922f82c18f0201abe8625b2dbf79d0741413d21c56d9d0855b1889b68946a2f8
|
data/README.md
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# fluent-plugin-bigquery
|
|
2
2
|
|
|
3
|
-
**This README is for v2.0.0.beta**
|
|
4
|
-
|
|
5
3
|
[Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
|
|
6
4
|
|
|
7
5
|
- **Plugin type**: Output
|
|
@@ -39,29 +37,30 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
|
39
37
|
|
|
40
38
|
#### common
|
|
41
39
|
|
|
42
|
-
| name
|
|
43
|
-
|
|
|
44
|
-
| auth_method
|
|
45
|
-
| email
|
|
46
|
-
| private_key_path
|
|
47
|
-
| private_key_passphrase
|
|
48
|
-
| json_key
|
|
49
|
-
| project
|
|
50
|
-
| dataset
|
|
51
|
-
| table
|
|
52
|
-
| tables
|
|
53
|
-
| auto_create_table
|
|
54
|
-
| ignore_unknown_values
|
|
55
|
-
| schema
|
|
56
|
-
| schema_path
|
|
57
|
-
| fetch_schema
|
|
58
|
-
| fetch_schema_table
|
|
59
|
-
| schema_cache_expire
|
|
60
|
-
| request_timeout_sec
|
|
61
|
-
| request_open_timeout_sec
|
|
62
|
-
| time_partitioning_type
|
|
63
|
-
| time_partitioning_field
|
|
64
|
-
| time_partitioning_expiration
|
|
40
|
+
| name | type | required? | placeholder? | default | description |
|
|
41
|
+
| :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
|
42
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
|
43
|
+
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
|
44
|
+
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
|
45
|
+
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
|
46
|
+
| json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
|
|
47
|
+
| project | string | yes | yes | nil | |
|
|
48
|
+
| dataset | string | yes | yes | nil | |
|
|
49
|
+
| table | string | yes (either `tables`) | yes | nil | |
|
|
50
|
+
| tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
|
|
51
|
+
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
|
52
|
+
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
|
53
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
|
54
|
+
| schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
|
|
55
|
+
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
|
56
|
+
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
|
57
|
+
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
|
58
|
+
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
|
59
|
+
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
|
60
|
+
| time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
|
61
|
+
| time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition(experimental feature on BigQuery). |
|
|
62
|
+
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
|
63
|
+
| time_partitioning_require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. (experimental feature on BigQuery) |
|
|
65
64
|
|
|
66
65
|
#### bigquery_insert
|
|
67
66
|
|
|
@@ -7,10 +7,9 @@ module Fluent
|
|
|
7
7
|
RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
|
|
8
8
|
|
|
9
9
|
class << self
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
# @param e [Google::Apis::Error]
|
|
11
|
+
# @param message [String]
|
|
12
|
+
def wrap(e, message = nil)
|
|
14
13
|
if retryable_error?(e)
|
|
15
14
|
RetryableError.new(message, e)
|
|
16
15
|
else
|
|
@@ -18,12 +17,9 @@ module Fluent
|
|
|
18
17
|
end
|
|
19
18
|
end
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
retryable_error_reason?(reason) ||
|
|
26
|
-
(e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
|
|
20
|
+
# @param e [Google::Apis::Error]
|
|
21
|
+
def retryable_error?(e)
|
|
22
|
+
e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
|
|
27
23
|
end
|
|
28
24
|
|
|
29
25
|
def retryable_error_reason?(reason)
|
|
@@ -34,13 +34,7 @@ module Fluent
|
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
if
|
|
38
|
-
definition[:time_partitioning] = {
|
|
39
|
-
type: @options[:time_partitioning_type].to_s.upcase,
|
|
40
|
-
field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
|
|
41
|
-
expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
|
|
42
|
-
}.select { |_, value| !value.nil? }
|
|
43
|
-
end
|
|
37
|
+
definition.merge!(time_partitioning: time_partitioning) if time_partitioning
|
|
44
38
|
client.insert_table(project, dataset, definition, {})
|
|
45
39
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
|
46
40
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
|
@@ -51,10 +45,9 @@ module Fluent
|
|
|
51
45
|
return
|
|
52
46
|
end
|
|
53
47
|
|
|
54
|
-
|
|
55
|
-
log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
|
|
48
|
+
log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
|
|
56
49
|
|
|
57
|
-
if
|
|
50
|
+
if create_table_retry_count < create_table_retry_limit
|
|
58
51
|
sleep create_table_retry_wait
|
|
59
52
|
create_table_retry_wait *= 2
|
|
60
53
|
create_table_retry_count += 1
|
|
@@ -77,14 +70,19 @@ module Fluent
|
|
|
77
70
|
nil
|
|
78
71
|
end
|
|
79
72
|
|
|
80
|
-
def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
|
|
73
|
+
def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
|
|
81
74
|
body = {
|
|
82
75
|
rows: rows,
|
|
83
76
|
skip_invalid_rows: @options[:skip_invalid_rows],
|
|
84
77
|
ignore_unknown_values: @options[:ignore_unknown_values],
|
|
85
78
|
}
|
|
86
79
|
body.merge!(template_suffix: template_suffix) if template_suffix
|
|
87
|
-
|
|
80
|
+
|
|
81
|
+
if @options[:auto_create_table]
|
|
82
|
+
res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
|
83
|
+
else
|
|
84
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
|
85
|
+
end
|
|
88
86
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
|
89
87
|
|
|
90
88
|
if res.insert_errors && !res.insert_errors.empty?
|
|
@@ -101,8 +99,7 @@ module Fluent
|
|
|
101
99
|
end
|
|
102
100
|
end
|
|
103
101
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
|
104
|
-
|
|
105
|
-
error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
|
|
102
|
+
error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
|
|
106
103
|
wrapped = Fluent::BigQuery::Error.wrap(e)
|
|
107
104
|
if wrapped.retryable?
|
|
108
105
|
log.warn "tabledata.insertAll API", error_data
|
|
@@ -132,9 +129,6 @@ module Fluent
|
|
|
132
129
|
dataset_id: dataset,
|
|
133
130
|
table_id: table_id,
|
|
134
131
|
},
|
|
135
|
-
schema: {
|
|
136
|
-
fields: fields.to_a,
|
|
137
|
-
},
|
|
138
132
|
write_disposition: "WRITE_APPEND",
|
|
139
133
|
source_format: source_format,
|
|
140
134
|
ignore_unknown_values: @options[:ignore_unknown_values],
|
|
@@ -144,17 +138,18 @@ module Fluent
|
|
|
144
138
|
}
|
|
145
139
|
|
|
146
140
|
job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
|
|
147
|
-
configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
|
|
148
141
|
configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
|
|
149
142
|
|
|
150
|
-
# If target table is already exist, omit schema configuration.
|
|
151
|
-
# Because schema changing is easier.
|
|
152
143
|
begin
|
|
153
|
-
|
|
154
|
-
|
|
144
|
+
# Check table existance
|
|
145
|
+
client.get_table(project, dataset, table_id)
|
|
146
|
+
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
|
147
|
+
if e.status_code == 404 && /Not Found: Table/i =~ e.message
|
|
148
|
+
raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
|
|
149
|
+
raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
|
|
150
|
+
configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
|
|
151
|
+
configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
|
|
155
152
|
end
|
|
156
|
-
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
|
|
157
|
-
raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
|
|
158
153
|
end
|
|
159
154
|
|
|
160
155
|
res = client.insert_job(
|
|
@@ -167,19 +162,7 @@ module Fluent
|
|
|
167
162
|
)
|
|
168
163
|
JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
|
|
169
164
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
|
170
|
-
|
|
171
|
-
log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
|
|
172
|
-
|
|
173
|
-
if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
|
174
|
-
# Table Not Found: Auto Create Table
|
|
175
|
-
create_table(
|
|
176
|
-
project,
|
|
177
|
-
dataset,
|
|
178
|
-
table_id,
|
|
179
|
-
fields,
|
|
180
|
-
)
|
|
181
|
-
raise "table created. send rows next time."
|
|
182
|
-
end
|
|
165
|
+
log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
|
|
183
166
|
|
|
184
167
|
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
|
185
168
|
return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
|
|
@@ -317,6 +300,43 @@ module Fluent
|
|
|
317
300
|
"NEWLINE_DELIMITED_JSON"
|
|
318
301
|
end
|
|
319
302
|
end
|
|
303
|
+
|
|
304
|
+
def time_partitioning
|
|
305
|
+
return @time_partitioning if instance_variable_defined?(:@time_partitioning)
|
|
306
|
+
|
|
307
|
+
if @options[:time_partitioning_type]
|
|
308
|
+
@time_partitioning = {
|
|
309
|
+
type: @options[:time_partitioning_type].to_s.upcase,
|
|
310
|
+
field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
|
|
311
|
+
expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
|
|
312
|
+
require_partition_filter: @options[:time_partitioning_require_partition_filter],
|
|
313
|
+
}.reject { |_, v| v.nil? }
|
|
314
|
+
else
|
|
315
|
+
@time_partitioning
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
|
320
|
+
try_count ||= 1
|
|
321
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
|
322
|
+
rescue Google::Apis::ClientError => e
|
|
323
|
+
if e.status_code == 404 && /Not Found: Table/i =~ e.message
|
|
324
|
+
if try_count == 1
|
|
325
|
+
# Table Not Found: Auto Create Table
|
|
326
|
+
create_table(project, dataset, table_id, schema)
|
|
327
|
+
elsif try_count > 10
|
|
328
|
+
raise "A new table was created but it is not found."
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Retry to insert several times because the created table is not visible from Streaming insert for a little while
|
|
332
|
+
# cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
|
|
333
|
+
try_count += 1
|
|
334
|
+
sleep 5
|
|
335
|
+
log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
|
|
336
|
+
retry
|
|
337
|
+
end
|
|
338
|
+
raise
|
|
339
|
+
end
|
|
320
340
|
end
|
|
321
341
|
end
|
|
322
342
|
end
|
|
@@ -69,6 +69,7 @@ module Fluent
|
|
|
69
69
|
config_param :time_partitioning_type, :enum, list: [:day], default: nil
|
|
70
70
|
config_param :time_partitioning_field, :string, default: nil
|
|
71
71
|
config_param :time_partitioning_expiration, :time, default: nil
|
|
72
|
+
config_param :time_partitioning_require_partition_filter, :bool, default: false
|
|
72
73
|
|
|
73
74
|
## Formatter
|
|
74
75
|
config_section :format do
|
|
@@ -139,8 +140,9 @@ module Fluent
|
|
|
139
140
|
prevent_duplicate_load: @prevent_duplicate_load,
|
|
140
141
|
auto_create_table: @auto_create_table,
|
|
141
142
|
time_partitioning_type: @time_partitioning_type,
|
|
142
|
-
time_partitioning_field: time_partitioning_field,
|
|
143
|
+
time_partitioning_field: @time_partitioning_field,
|
|
143
144
|
time_partitioning_expiration: @time_partitioning_expiration,
|
|
145
|
+
time_partitioning_require_partition_filter: @time_partitioning_require_partition_filter,
|
|
144
146
|
timeout_sec: @request_timeout_sec,
|
|
145
147
|
open_timeout_sec: @request_open_timeout_sec,
|
|
146
148
|
})
|
|
@@ -96,14 +96,8 @@ module Fluent
|
|
|
96
96
|
end
|
|
97
97
|
|
|
98
98
|
def insert(project, dataset, table_id, rows, schema, template_suffix)
|
|
99
|
-
writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
|
|
99
|
+
writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
|
|
100
100
|
rescue Fluent::BigQuery::Error => e
|
|
101
|
-
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
|
|
102
|
-
# Table Not Found: Auto Create Table
|
|
103
|
-
writer.create_table(project, dataset, table_id, schema)
|
|
104
|
-
raise "table created. send rows next time."
|
|
105
|
-
end
|
|
106
|
-
|
|
107
101
|
raise if e.retryable?
|
|
108
102
|
|
|
109
103
|
if @secondary
|
|
@@ -121,7 +121,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
|
121
121
|
driver = create_driver
|
|
122
122
|
|
|
123
123
|
stub_writer do |writer|
|
|
124
|
-
mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
|
|
125
124
|
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
|
126
125
|
rows: [{json: hash_including(entry)}],
|
|
127
126
|
skip_invalid_rows: false,
|
|
@@ -346,10 +345,24 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
|
346
345
|
CONFIG
|
|
347
346
|
|
|
348
347
|
stub_writer do |writer|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
348
|
+
body = {
|
|
349
|
+
rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
|
|
350
|
+
skip_invalid_rows: false,
|
|
351
|
+
ignore_unknown_values: false,
|
|
352
|
+
}
|
|
353
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
|
|
354
|
+
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
|
355
|
+
end.at_least(1)
|
|
356
|
+
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
|
357
|
+
|
|
358
|
+
mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
|
|
359
|
+
table_reference: {
|
|
360
|
+
table_id: 'foo',
|
|
361
|
+
},
|
|
362
|
+
schema: {
|
|
363
|
+
fields: driver.instance.instance_variable_get(:@table_schema).to_a,
|
|
364
|
+
},
|
|
365
|
+
}, {})
|
|
353
366
|
end
|
|
354
367
|
|
|
355
368
|
assert_raise(RuntimeError) do
|
|
@@ -403,13 +416,34 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
|
403
416
|
time_partitioning_type day
|
|
404
417
|
time_partitioning_field time
|
|
405
418
|
time_partitioning_expiration 1h
|
|
419
|
+
time_partitioning_require_partition_filter true
|
|
406
420
|
CONFIG
|
|
407
421
|
|
|
408
422
|
stub_writer do |writer|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
423
|
+
body = {
|
|
424
|
+
rows: [message],
|
|
425
|
+
skip_invalid_rows: false,
|
|
426
|
+
ignore_unknown_values: false,
|
|
427
|
+
}
|
|
428
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
|
|
429
|
+
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
|
430
|
+
end.at_least(1)
|
|
431
|
+
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
|
432
|
+
|
|
433
|
+
mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
|
|
434
|
+
table_reference: {
|
|
435
|
+
table_id: 'foo',
|
|
436
|
+
},
|
|
437
|
+
schema: {
|
|
438
|
+
fields: driver.instance.instance_variable_get(:@table_schema).to_a,
|
|
439
|
+
},
|
|
440
|
+
time_partitioning: {
|
|
441
|
+
type: 'DAY',
|
|
442
|
+
field: 'time',
|
|
443
|
+
expiration_ms: 3600000,
|
|
444
|
+
require_partition_filter: true
|
|
445
|
+
},
|
|
446
|
+
}, {})
|
|
413
447
|
end
|
|
414
448
|
|
|
415
449
|
assert_raise(RuntimeError) do
|
|
@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
39
39
|
writer
|
|
40
40
|
end
|
|
41
41
|
end
|
|
42
|
-
|
|
43
|
-
def test_write
|
|
44
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
|
45
42
|
|
|
43
|
+
def test_write
|
|
46
44
|
response_stub = stub!
|
|
47
45
|
|
|
48
46
|
driver = create_driver
|
|
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
60
58
|
dataset_id: 'yourdataset_id',
|
|
61
59
|
table_id: 'foo',
|
|
62
60
|
},
|
|
63
|
-
schema: {
|
|
64
|
-
fields: schema_fields,
|
|
65
|
-
},
|
|
66
61
|
write_disposition: "WRITE_APPEND",
|
|
67
62
|
source_format: "NEWLINE_DELIMITED_JSON",
|
|
68
63
|
ignore_unknown_values: false,
|
|
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
99
94
|
schema_path #{SCHEMA_PATH}
|
|
100
95
|
prevent_duplicate_load true
|
|
101
96
|
CONFIG
|
|
102
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
|
103
97
|
|
|
104
98
|
response_stub = stub!
|
|
105
99
|
stub_writer do |writer|
|
|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
116
110
|
dataset_id: 'yourdataset_id',
|
|
117
111
|
table_id: 'foo',
|
|
118
112
|
},
|
|
119
|
-
schema: {
|
|
120
|
-
fields: schema_fields,
|
|
121
|
-
},
|
|
122
113
|
write_disposition: "WRITE_APPEND",
|
|
123
114
|
source_format: "NEWLINE_DELIMITED_JSON",
|
|
124
115
|
ignore_unknown_values: false,
|
|
@@ -138,7 +129,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
138
129
|
|
|
139
130
|
def test_write_with_retryable_error
|
|
140
131
|
driver = create_driver
|
|
141
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
|
142
132
|
|
|
143
133
|
driver.instance_start
|
|
144
134
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
|
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
158
148
|
dataset_id: 'yourdataset_id',
|
|
159
149
|
table_id: 'foo',
|
|
160
150
|
},
|
|
161
|
-
schema: {
|
|
162
|
-
fields: schema_fields,
|
|
163
|
-
},
|
|
164
151
|
write_disposition: "WRITE_APPEND",
|
|
165
152
|
source_format: "NEWLINE_DELIMITED_JSON",
|
|
166
153
|
ignore_unknown_values: false,
|
|
@@ -225,7 +212,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
225
212
|
utc
|
|
226
213
|
</secondary>
|
|
227
214
|
CONFIG
|
|
228
|
-
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
|
229
215
|
|
|
230
216
|
driver.instance_start
|
|
231
217
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
|
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
245
231
|
dataset_id: 'yourdataset_id',
|
|
246
232
|
table_id: 'foo',
|
|
247
233
|
},
|
|
248
|
-
schema: {
|
|
249
|
-
fields: schema_fields,
|
|
250
|
-
},
|
|
251
234
|
write_disposition: "WRITE_APPEND",
|
|
252
235
|
source_format: "NEWLINE_DELIMITED_JSON",
|
|
253
236
|
ignore_unknown_values: false,
|
|
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
|
289
272
|
driver.instance_shutdown
|
|
290
273
|
end
|
|
291
274
|
|
|
275
|
+
def test_write_with_auto_create_table
|
|
276
|
+
driver = create_driver(<<-CONFIG)
|
|
277
|
+
table foo
|
|
278
|
+
email foo@bar.example
|
|
279
|
+
private_key_path /path/to/key
|
|
280
|
+
project yourproject_id
|
|
281
|
+
dataset yourdataset_id
|
|
282
|
+
|
|
283
|
+
<buffer>
|
|
284
|
+
@type memory
|
|
285
|
+
</buffer>
|
|
286
|
+
|
|
287
|
+
<inject>
|
|
288
|
+
time_format %s
|
|
289
|
+
time_key time
|
|
290
|
+
</inject>
|
|
291
|
+
|
|
292
|
+
auto_create_table true
|
|
293
|
+
schema_path #{SCHEMA_PATH}
|
|
294
|
+
CONFIG
|
|
295
|
+
|
|
296
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
|
297
|
+
|
|
298
|
+
stub_writer do |writer|
|
|
299
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
|
|
300
|
+
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
mock(writer.client).insert_job('yourproject_id', {
|
|
304
|
+
configuration: {
|
|
305
|
+
load: {
|
|
306
|
+
destination_table: {
|
|
307
|
+
project_id: 'yourproject_id',
|
|
308
|
+
dataset_id: 'yourdataset_id',
|
|
309
|
+
table_id: 'foo',
|
|
310
|
+
},
|
|
311
|
+
write_disposition: "WRITE_APPEND",
|
|
312
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
|
313
|
+
ignore_unknown_values: false,
|
|
314
|
+
max_bad_records: 0,
|
|
315
|
+
schema: {
|
|
316
|
+
fields: schema_fields,
|
|
317
|
+
},
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
|
|
321
|
+
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
driver.run do
|
|
326
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b"})
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
|
|
292
330
|
private
|
|
293
331
|
|
|
294
332
|
def create_response_stub(response)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-bigquery
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Naoya Ito
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2018-
|
|
12
|
+
date: 2018-11-05 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rake
|