fluent-plugin-bigquery 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +16 -0
- data/README.md +57 -37
- data/fluent-plugin-bigquery.gemspec +1 -1
- data/lib/fluent/plugin/bigquery/errors.rb +1 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +18 -8
- data/lib/fluent/plugin/out_bigquery.rb +42 -69
- data/test/plugin/test_out_bigquery.rb +220 -287
- metadata +6 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 28048ac73908138fd792fce4513a880702cbcba1
|
|
4
|
+
data.tar.gz: fdf18e7653e8da3fe3d9d736ec3ab053fcc18e67
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 314b093ee2bd215a297a9e32d20a66f831dab816d24bbc9175c38be63b34f6a2433d66206c3962d354ddf256417c0b33300d6cbb9c4b2fe86e9261245ccbebb9
|
|
7
|
+
data.tar.gz: a671371a498ceefee7d4f4617b95cddf9b377485d2c1dba669c4ed2c6ec5d4fcde83b10d6f2bc17b72b787068582aee7c7715b190a20f677c97a695d63c2105b
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
<!-- Please check your config and docs of fluentd !! -->
|
|
2
|
+
|
|
3
|
+
## Environments
|
|
4
|
+
|
|
5
|
+
- fluentd version:
|
|
6
|
+
- plugin version:
|
|
7
|
+
|
|
8
|
+
## Configuration
|
|
9
|
+
<!-- Please write your configuration -->
|
|
10
|
+
|
|
11
|
+
## Expected Behavior
|
|
12
|
+
|
|
13
|
+
## Actual Behavior
|
|
14
|
+
|
|
15
|
+
## Log (if you have)
|
|
16
|
+
|
data/README.md
CHANGED
|
@@ -31,43 +31,42 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
|
31
31
|
|
|
32
32
|
### Options
|
|
33
33
|
|
|
34
|
-
| name | type | required? | placeholder? | default
|
|
35
|
-
| :------------------------------------- | :------------ | :----------- | :---------- | :-------------------------
|
|
36
|
-
| method | string | no | no | insert
|
|
37
|
-
| auth_method | enum | yes | no | private_key
|
|
38
|
-
| email | string | yes (private_key) | no | nil
|
|
39
|
-
| private_key_path | string | yes (private_key) | no | nil
|
|
40
|
-
| private_key_passphrase | string | yes (private_key) | no | nil
|
|
41
|
-
| json_key | string | yes (json_key) | no | nil
|
|
42
|
-
| project | string | yes | yes | nil
|
|
43
|
-
| dataset | string | yes | yes | nil
|
|
44
|
-
| table | string | yes (either `tables`) | yes | nil
|
|
45
|
-
| tables | array(string) | yes (either `table`) | yes | nil
|
|
46
|
-
| template_suffix | string | no | yes | nil
|
|
47
|
-
| auto_create_table | bool | no | no | false
|
|
48
|
-
| skip_invalid_rows | bool | no | no | false
|
|
49
|
-
| max_bad_records | integer | no | no | 0
|
|
50
|
-
| ignore_unknown_values | bool | no | no | false
|
|
51
|
-
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil
|
|
52
|
-
| schema_path | string | yes (either `fetch_schema`) | no | nil
|
|
53
|
-
| fetch_schema | bool | yes (either `schema_path`) | no | false
|
|
54
|
-
| fetch_schema_table | string | no | yes | nil
|
|
55
|
-
| schema_cache_expire | integer | no | no | 600
|
|
56
|
-
|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
61
|
-
|
|
|
62
|
-
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
|
67
|
-
|
|
|
68
|
-
|
|
|
69
|
-
|
|
|
70
|
-
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
|
34
|
+
| name | type | required? | placeholder? | default | description |
|
|
35
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
|
36
|
+
| method | string | no | no | insert | `insert` (Streaming Insert) or `load` (load job) |
|
|
37
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
|
38
|
+
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
|
39
|
+
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
|
40
|
+
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
|
41
|
+
| json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
|
|
42
|
+
| project | string | yes | yes | nil | |
|
|
43
|
+
| dataset | string | yes | yes | nil | |
|
|
44
|
+
| table | string | yes (either `tables`) | yes | nil | |
|
|
45
|
+
| tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
|
|
46
|
+
| template_suffix | string | no | yes | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
|
|
47
|
+
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
|
48
|
+
| skip_invalid_rows | bool | no | no | false | Only `insert` method. |
|
|
49
|
+
| max_bad_records | integer | no | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
|
|
50
|
+
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
|
51
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
|
52
|
+
| schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
|
|
53
|
+
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
|
54
|
+
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
|
55
|
+
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
|
56
|
+
| insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
|
|
57
|
+
| add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
|
|
58
|
+
| allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
|
59
|
+
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
|
60
|
+
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
|
61
|
+
| time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
|
62
|
+
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
|
63
|
+
|
|
64
|
+
### Deprecated
|
|
65
|
+
|
|
66
|
+
| name | type | required? | placeholder? | default | description |
|
|
67
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
|
68
|
+
| replace_record_key | bool | no | no | false | Use other filter plugin. |
|
|
69
|
+
| replace_record_key_regexp{1-10} | string | no | no | nil | |
|
|
71
70
|
|
|
72
71
|
### Buffer section
|
|
73
72
|
|
|
@@ -114,6 +113,27 @@ For example.
|
|
|
114
113
|
|
|
115
114
|
see. https://github.com/fluent/fluentd/blob/master/lib/fluent/plugin_helper/inject.rb
|
|
116
115
|
|
|
116
|
+
### Formatter section
|
|
117
|
+
|
|
118
|
+
This section is for `load` mode only.
|
|
119
|
+
If you use `insert` mode, used formatter is `json` only.
|
|
120
|
+
|
|
121
|
+
Bigquery supports `csv`, `json` and `avro` format. Default is `json`
|
|
122
|
+
I recommend to use `json` for now.
|
|
123
|
+
|
|
124
|
+
For example.
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
source_format csv
|
|
128
|
+
|
|
129
|
+
<format>
|
|
130
|
+
@type csv
|
|
131
|
+
fields col1, col2, col3
|
|
132
|
+
</format>
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
see. https://github.com/fluent/fluentd/blob/master/lib/fluent/plugin_helper/formatter.rb
|
|
136
|
+
|
|
117
137
|
## Examples
|
|
118
138
|
|
|
119
139
|
### Streaming inserts
|
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
spec.add_development_dependency "test-unit"
|
|
24
24
|
spec.add_development_dependency "test-unit-rr"
|
|
25
25
|
|
|
26
|
-
spec.add_runtime_dependency "google-api-client", ">= 0.
|
|
26
|
+
spec.add_runtime_dependency "google-api-client", ">= 0.11.0"
|
|
27
27
|
spec.add_runtime_dependency "googleauth", ">= 0.5.0"
|
|
28
28
|
spec.add_runtime_dependency "multi_json"
|
|
29
29
|
spec.add_runtime_dependency "fluentd", "~> 0.14.0"
|
|
@@ -3,7 +3,7 @@ module Fluent
|
|
|
3
3
|
# @abstract
|
|
4
4
|
class Error < StandardError
|
|
5
5
|
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
|
6
|
-
RETRYABLE_INSERT_ERRORS_REASON = %w(timeout).freeze
|
|
6
|
+
RETRYABLE_INSERT_ERRORS_REASON = %w(timeout backendError internalError rateLimitExceeded).freeze
|
|
7
7
|
RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
|
|
8
8
|
|
|
9
9
|
class << self
|
|
@@ -16,6 +16,9 @@ module Fluent
|
|
|
16
16
|
|
|
17
17
|
client = Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
|
|
18
18
|
cl.authorization = get_auth
|
|
19
|
+
cl.client_options.open_timeout_sec = @options[:open_timeout_sec] if @options[:open_timeout_sec]
|
|
20
|
+
cl.client_options.read_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
|
|
21
|
+
cl.client_options.send_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
|
|
19
22
|
end
|
|
20
23
|
|
|
21
24
|
@cached_client_expiration = Time.now + 1800
|
|
@@ -91,9 +94,7 @@ module Fluent
|
|
|
91
94
|
ignore_unknown_values: @options[:ignore_unknown_values],
|
|
92
95
|
}
|
|
93
96
|
body.merge!(template_suffix: template_suffix) if template_suffix
|
|
94
|
-
res = client.insert_all_table_data(project, dataset, table_id, body, {
|
|
95
|
-
options: {timeout_sec: @options[:timeout_sec], open_timeout_sec: @options[:open_timeout_sec]}
|
|
96
|
-
})
|
|
97
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
|
97
98
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
|
98
99
|
|
|
99
100
|
if res.insert_errors && !res.insert_errors.empty?
|
|
@@ -137,7 +138,7 @@ module Fluent
|
|
|
137
138
|
fields: fields.to_a,
|
|
138
139
|
},
|
|
139
140
|
write_disposition: "WRITE_APPEND",
|
|
140
|
-
source_format:
|
|
141
|
+
source_format: source_format,
|
|
141
142
|
ignore_unknown_values: @options[:ignore_unknown_values],
|
|
142
143
|
max_bad_records: @options[:max_bad_records],
|
|
143
144
|
}
|
|
@@ -164,10 +165,6 @@ module Fluent
|
|
|
164
165
|
{
|
|
165
166
|
upload_source: upload_source,
|
|
166
167
|
content_type: "application/octet-stream",
|
|
167
|
-
options: {
|
|
168
|
-
timeout_sec: @options[:timeout_sec],
|
|
169
|
-
open_timeout_sec: @options[:open_timeout_sec],
|
|
170
|
-
}
|
|
171
168
|
}
|
|
172
169
|
)
|
|
173
170
|
wait_load_job(chunk_id, project, dataset, res.job_reference.job_id, table_id)
|
|
@@ -299,6 +296,19 @@ module Fluent
|
|
|
299
296
|
@log.debug "job_id_key: #{job_id_key}"
|
|
300
297
|
"fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
|
|
301
298
|
end
|
|
299
|
+
|
|
300
|
+
def source_format
|
|
301
|
+
case @options[:source_format]
|
|
302
|
+
when :json
|
|
303
|
+
"NEWLINE_DELIMITED_JSON"
|
|
304
|
+
when :avro
|
|
305
|
+
"AVRO"
|
|
306
|
+
when :csv
|
|
307
|
+
"CSV"
|
|
308
|
+
else
|
|
309
|
+
"NEWLINE_DELIMITED_JSON"
|
|
310
|
+
end
|
|
311
|
+
end
|
|
302
312
|
end
|
|
303
313
|
end
|
|
304
314
|
end
|
|
@@ -14,7 +14,7 @@ module Fluent
|
|
|
14
14
|
class BigQueryOutput < Output
|
|
15
15
|
Fluent::Plugin.register_output('bigquery', self)
|
|
16
16
|
|
|
17
|
-
helpers :inject
|
|
17
|
+
helpers :inject, :formatter
|
|
18
18
|
|
|
19
19
|
# https://developers.google.com/bigquery/browser-tool-quickstart
|
|
20
20
|
# https://developers.google.com/bigquery/bigquery-api-quickstart
|
|
@@ -23,22 +23,33 @@ module Fluent
|
|
|
23
23
|
def configure_for_insert(conf)
|
|
24
24
|
raise ConfigError unless conf["method"].nil? || conf["method"] == "insert"
|
|
25
25
|
|
|
26
|
+
formatter_config = conf.elements("format")[0]
|
|
27
|
+
if formatter_config && formatter_config['@type'] != "json"
|
|
28
|
+
log.warn "`insert` mode supports only json formatter."
|
|
29
|
+
formatter_config['@type'] = nil
|
|
30
|
+
end
|
|
31
|
+
@formatter = formatter_create(usage: 'out_bigquery_for_insert', type: 'json', conf: formatter_config)
|
|
32
|
+
|
|
26
33
|
buffer_config = conf.elements("buffer")[0]
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
if buffer_config
|
|
35
|
+
buffer_config["@type"] = "memory" unless buffer_config["@type"]
|
|
36
|
+
buffer_config["flush_mode"] = :interval unless buffer_config["flush_mode"]
|
|
37
|
+
buffer_config["flush_interval"] = 0.25 unless buffer_config["flush_interval"]
|
|
38
|
+
buffer_config["flush_thread_interval"] = 0.05 unless buffer_config["flush_thread_interval"]
|
|
39
|
+
buffer_config["flush_thread_burst_interval"] = 0.05 unless buffer_config["flush_thread_burst_interval"]
|
|
40
|
+
buffer_config["chunk_limit_size"] = 1 * 1024 ** 2 unless buffer_config["chunk_limit_size"] # 1MB
|
|
41
|
+
buffer_config["total_limit_size"] = 1 * 1024 ** 3 unless buffer_config["total_limit_size"] # 1GB
|
|
42
|
+
buffer_config["chunk_records_limit"] = 500 unless buffer_config["chunk_records_limit"]
|
|
43
|
+
end
|
|
36
44
|
end
|
|
37
45
|
|
|
38
46
|
### default for loads
|
|
39
47
|
def configure_for_load(conf)
|
|
40
48
|
raise ConfigError unless conf["method"] == "load"
|
|
41
49
|
|
|
50
|
+
formatter_config = conf.elements("format")[0]
|
|
51
|
+
@formatter = formatter_create(usage: 'out_bigquery_for_load', conf: formatter_config, default_type: 'json')
|
|
52
|
+
|
|
42
53
|
buffer_config = conf.elements("buffer")[0]
|
|
43
54
|
return unless buffer_config
|
|
44
55
|
buffer_config["@type"] = "file" unless buffer_config["@type"]
|
|
@@ -80,6 +91,8 @@ module Fluent
|
|
|
80
91
|
|
|
81
92
|
config_param :auto_create_table, :bool, default: false
|
|
82
93
|
|
|
94
|
+
config_param :source_format, :enum, list: [:json, :avro, :csv], default: :json
|
|
95
|
+
|
|
83
96
|
# skip_invalid_rows (only insert)
|
|
84
97
|
# Insert all valid rows of a request, even if invalid rows exist.
|
|
85
98
|
# The default value is false, which causes the entire request to fail if any invalid rows exist.
|
|
@@ -99,23 +112,11 @@ module Fluent
|
|
|
99
112
|
config_param :fetch_schema, :bool, default: false
|
|
100
113
|
config_param :fetch_schema_table, :string, default: nil
|
|
101
114
|
config_param :schema_cache_expire, :time, default: 600
|
|
102
|
-
config_param :field_string, :array, value_type: :string, default: nil
|
|
103
|
-
config_param :field_integer, :array, value_type: :string, default: nil
|
|
104
|
-
config_param :field_float, :array, value_type: :string, default: nil
|
|
105
|
-
config_param :field_boolean, :array, value_type: :string, default: nil
|
|
106
|
-
config_param :field_timestamp, :array, value_type: :string, default: nil
|
|
107
|
-
### TODO: record field stream inserts doesn't works well?
|
|
108
|
-
### At table creation, table type json + field type record -> field type validation fails
|
|
109
|
-
### At streaming inserts, schema cannot be specified
|
|
110
|
-
# config_param :field_record, :string, defualt: nil
|
|
111
|
-
# config_param :optional_data_field, :string, default: nil
|
|
112
115
|
|
|
113
116
|
REGEXP_MAX_NUM = 10
|
|
114
117
|
config_param :replace_record_key, :bool, default: false
|
|
115
118
|
(1..REGEXP_MAX_NUM).each {|i| config_param :"replace_record_key_regexp#{i}", :string, default: nil }
|
|
116
119
|
|
|
117
|
-
config_param :convert_hash_to_json, :bool, default: false
|
|
118
|
-
|
|
119
120
|
# insert_id_field (only insert)
|
|
120
121
|
config_param :insert_id_field, :string, default: nil
|
|
121
122
|
# prevent_duplicate_load (only load)
|
|
@@ -157,6 +158,11 @@ module Fluent
|
|
|
157
158
|
config_param :time_partitioning_type, :enum, list: [:day], default: nil
|
|
158
159
|
config_param :time_partitioning_expiration, :time, default: nil
|
|
159
160
|
|
|
161
|
+
## Formatter
|
|
162
|
+
config_section :format do
|
|
163
|
+
config_set_default :@type, 'json'
|
|
164
|
+
end
|
|
165
|
+
|
|
160
166
|
### Table types
|
|
161
167
|
# https://developers.google.com/bigquery/docs/tables
|
|
162
168
|
#
|
|
@@ -215,13 +221,14 @@ module Fluent
|
|
|
215
221
|
raise Fluent::ConfigError, "unrecognized 'auth_method': #{@auth_method}"
|
|
216
222
|
end
|
|
217
223
|
|
|
224
|
+
@writers = {}
|
|
225
|
+
|
|
218
226
|
unless @table.nil? ^ @tables.nil?
|
|
219
227
|
raise Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid"
|
|
220
228
|
end
|
|
221
229
|
|
|
222
230
|
@tablelist = @tables ? @tables : [@table]
|
|
223
231
|
|
|
224
|
-
legacy_schema_config_deprecation
|
|
225
232
|
@table_schema = Fluent::BigQuery::RecordSchema.new('record')
|
|
226
233
|
if @schema
|
|
227
234
|
@table_schema.load_schema(@schema)
|
|
@@ -230,14 +237,7 @@ module Fluent
|
|
|
230
237
|
@table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
|
|
231
238
|
end
|
|
232
239
|
|
|
233
|
-
|
|
234
|
-
types.each do |type|
|
|
235
|
-
fields = instance_variable_get("@field_#{type}")
|
|
236
|
-
next unless fields
|
|
237
|
-
fields.each do |field|
|
|
238
|
-
@table_schema.register_field field, type
|
|
239
|
-
end
|
|
240
|
-
end
|
|
240
|
+
warn "[DEPRECATION] `replace_record_key` param is deprecated. Please use filter_record_transformer or fluent-plugin-record-reformer" if @replace_record_key
|
|
241
241
|
|
|
242
242
|
@regexps = {}
|
|
243
243
|
(1..REGEXP_MAX_NUM).each do |i|
|
|
@@ -259,8 +259,6 @@ module Fluent
|
|
|
259
259
|
|
|
260
260
|
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}/template_suffix=#{@template_suffix}"
|
|
261
261
|
placeholder_validate!(:bigquery, placeholder_params)
|
|
262
|
-
|
|
263
|
-
warn "[DEPRECATION] `convert_hash_to_json` param is deprecated. If Hash value is inserted string field, plugin convert it to json automatically." if @convert_hash_to_json
|
|
264
262
|
end
|
|
265
263
|
|
|
266
264
|
def start
|
|
@@ -273,10 +271,11 @@ module Fluent
|
|
|
273
271
|
end
|
|
274
272
|
|
|
275
273
|
def writer
|
|
276
|
-
@
|
|
274
|
+
@writers["thread-#{Thread.current.object_id}"] ||= Fluent::BigQuery::Writer.new(@log, @auth_method, {
|
|
277
275
|
private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
|
|
278
276
|
email: @email,
|
|
279
277
|
json_key: @json_key,
|
|
278
|
+
source_format: @source_format,
|
|
280
279
|
skip_invalid_rows: @skip_invalid_rows,
|
|
281
280
|
ignore_unknown_values: @ignore_unknown_values,
|
|
282
281
|
max_bad_records: @max_bad_records,
|
|
@@ -303,45 +302,25 @@ module Fluent
|
|
|
303
302
|
new_record
|
|
304
303
|
end
|
|
305
304
|
|
|
306
|
-
def convert_hash_to_json(record)
|
|
307
|
-
record.each do |key, value|
|
|
308
|
-
if value.class == Hash
|
|
309
|
-
record[key] = MultiJson.dump(value)
|
|
310
|
-
end
|
|
311
|
-
end
|
|
312
|
-
record
|
|
313
|
-
end
|
|
314
|
-
|
|
315
305
|
def format(tag, time, record)
|
|
316
306
|
if @replace_record_key
|
|
317
307
|
record = replace_record_key(record)
|
|
318
308
|
end
|
|
319
309
|
|
|
320
|
-
if @convert_hash_to_json
|
|
321
|
-
record = convert_hash_to_json(record)
|
|
322
|
-
end
|
|
323
|
-
|
|
324
310
|
record = inject_values_to_record(tag, time, record)
|
|
325
311
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
end
|
|
334
|
-
ensure
|
|
335
|
-
@buffer.metadata_list.delete(meta)
|
|
336
|
-
end
|
|
312
|
+
meta = metadata(tag, time, record)
|
|
313
|
+
schema =
|
|
314
|
+
if @fetch_schema
|
|
315
|
+
fetch_schema(meta)
|
|
316
|
+
else
|
|
317
|
+
@table_schema
|
|
318
|
+
end
|
|
337
319
|
|
|
338
320
|
begin
|
|
339
|
-
buf = String.new
|
|
340
321
|
row = schema.format(record)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
end
|
|
344
|
-
buf
|
|
322
|
+
return if row.empty?
|
|
323
|
+
@formatter.format(tag, time, row)
|
|
345
324
|
rescue
|
|
346
325
|
log.error("format error", record: record, schema: schema)
|
|
347
326
|
raise
|
|
@@ -357,12 +336,6 @@ module Fluent
|
|
|
357
336
|
_write(chunk, table_id_format)
|
|
358
337
|
end
|
|
359
338
|
|
|
360
|
-
def legacy_schema_config_deprecation
|
|
361
|
-
if [@field_string, @field_integer, @field_float, @field_boolean, @field_timestamp].any?
|
|
362
|
-
warn "[DEPRECATION] `field_*` style schema config is deprecated. Instead of it, use `schema` config params that is array of json style."
|
|
363
|
-
end
|
|
364
|
-
end
|
|
365
|
-
|
|
366
339
|
def fetch_schema(metadata)
|
|
367
340
|
table_id = nil
|
|
368
341
|
project = extract_placeholders(@project, metadata)
|
|
@@ -44,10 +44,12 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
44
44
|
Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryOutput).configure(conf)
|
|
45
45
|
end
|
|
46
46
|
|
|
47
|
-
def stub_writer(driver)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
47
|
+
def stub_writer(driver, stub_auth: true)
|
|
48
|
+
stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
|
|
49
|
+
stub(writer).get_auth { nil } if stub_auth
|
|
50
|
+
yield writer
|
|
51
|
+
writer
|
|
52
|
+
end
|
|
51
53
|
end
|
|
52
54
|
|
|
53
55
|
def test_configure_table
|
|
@@ -65,37 +67,14 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
65
67
|
end
|
|
66
68
|
|
|
67
69
|
def test_configure_auth_private_key
|
|
68
|
-
key = stub!
|
|
69
|
-
mock(Google::APIClient::KeyUtils).load_from_pkcs12('/path/to/key', 'notasecret') { key }
|
|
70
|
-
authorization = Object.new
|
|
71
|
-
stub(Signet::OAuth2::Client).new
|
|
72
|
-
mock(Signet::OAuth2::Client).new(
|
|
73
|
-
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
|
74
|
-
audience: "https://accounts.google.com/o/oauth2/token",
|
|
75
|
-
scope: API_SCOPE,
|
|
76
|
-
issuer: 'foo@bar.example',
|
|
77
|
-
signing_key: key) { authorization }
|
|
78
|
-
|
|
79
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
|
80
|
-
mock(cl).__send__(:authorization=, authorization) {}
|
|
81
|
-
cl
|
|
82
|
-
end
|
|
83
|
-
|
|
84
70
|
driver = create_driver
|
|
85
|
-
|
|
86
|
-
|
|
71
|
+
stub_writer(driver, stub_auth: false) do |writer|
|
|
72
|
+
mock(writer).get_auth_from_private_key { stub! }
|
|
73
|
+
end
|
|
87
74
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
|
88
75
|
end
|
|
89
76
|
|
|
90
77
|
def test_configure_auth_compute_engine
|
|
91
|
-
authorization = Object.new
|
|
92
|
-
mock(Google::Auth::GCECredentials).new { authorization }
|
|
93
|
-
|
|
94
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
|
95
|
-
mock(cl).__send__(:authorization=, authorization) {}
|
|
96
|
-
cl
|
|
97
|
-
end
|
|
98
|
-
|
|
99
78
|
driver = create_driver(%[
|
|
100
79
|
table foo
|
|
101
80
|
auth_method compute_engine
|
|
@@ -107,25 +86,18 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
107
86
|
{"name": "bytes", "type": "INTEGER"}
|
|
108
87
|
]
|
|
109
88
|
])
|
|
110
|
-
|
|
111
|
-
driver
|
|
89
|
+
|
|
90
|
+
stub_writer(driver, stub_auth: false) do |writer|
|
|
91
|
+
mock(writer).get_auth_from_compute_engine { stub! }
|
|
92
|
+
end
|
|
112
93
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
|
113
94
|
end
|
|
114
95
|
|
|
115
96
|
def test_configure_auth_json_key_as_file
|
|
116
|
-
json_key_path = 'test/plugin/testdata/json_key.json'
|
|
117
|
-
authorization = Object.new
|
|
118
|
-
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
|
|
119
|
-
|
|
120
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
|
121
|
-
mock(cl).__send__(:authorization=, authorization) {}
|
|
122
|
-
cl
|
|
123
|
-
end
|
|
124
|
-
|
|
125
97
|
driver = create_driver(%[
|
|
126
98
|
table foo
|
|
127
99
|
auth_method json_key
|
|
128
|
-
json_key
|
|
100
|
+
json_key jsonkey.josn
|
|
129
101
|
project yourproject_id
|
|
130
102
|
dataset yourdataset_id
|
|
131
103
|
schema [
|
|
@@ -134,8 +106,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
134
106
|
{"name": "bytes", "type": "INTEGER"}
|
|
135
107
|
]
|
|
136
108
|
])
|
|
137
|
-
|
|
138
|
-
driver
|
|
109
|
+
|
|
110
|
+
stub_writer(driver, stub_auth: false) do |writer|
|
|
111
|
+
mock(writer).get_auth_from_json_key { stub! }
|
|
112
|
+
end
|
|
139
113
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
|
140
114
|
end
|
|
141
115
|
|
|
@@ -170,12 +144,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
170
144
|
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
|
171
145
|
json_key_io = StringIO.new(json_key)
|
|
172
146
|
authorization = Object.new
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
|
176
|
-
mock(cl).__send__(:authorization=, authorization) {}
|
|
177
|
-
cl
|
|
178
|
-
end
|
|
147
|
+
stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
|
|
179
148
|
|
|
180
149
|
driver = create_driver(%[
|
|
181
150
|
table foo
|
|
@@ -189,20 +158,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
189
158
|
{"name": "bytes", "type": "INTEGER"}
|
|
190
159
|
]
|
|
191
160
|
])
|
|
192
|
-
|
|
193
|
-
|
|
161
|
+
stub_writer(driver, stub_auth: false) do |writer|
|
|
162
|
+
mock.proxy(writer).get_auth_from_json_key { stub! }
|
|
163
|
+
end
|
|
194
164
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
|
195
165
|
end
|
|
196
166
|
|
|
197
167
|
def test_configure_auth_application_default
|
|
198
|
-
authorization = Object.new
|
|
199
|
-
mock(Google::Auth).get_application_default([API_SCOPE]) { authorization }
|
|
200
|
-
|
|
201
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
|
202
|
-
mock(cl).__send__(:authorization=, authorization) {}
|
|
203
|
-
cl
|
|
204
|
-
end
|
|
205
|
-
|
|
206
168
|
driver = create_driver(%[
|
|
207
169
|
table foo
|
|
208
170
|
auth_method application_default
|
|
@@ -215,8 +177,9 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
215
177
|
]
|
|
216
178
|
])
|
|
217
179
|
|
|
218
|
-
|
|
219
|
-
|
|
180
|
+
stub_writer(driver, stub_auth: false) do |writer|
|
|
181
|
+
mock.proxy(writer).get_auth_from_application_default { stub! }
|
|
182
|
+
end
|
|
220
183
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
|
221
184
|
end
|
|
222
185
|
|
|
@@ -282,18 +245,18 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
282
245
|
[
|
|
283
246
|
# <time_format>, <time field type>, <time expectation generator>, <assertion>
|
|
284
247
|
[
|
|
285
|
-
"%s.%6N",
|
|
248
|
+
"%s.%6N",
|
|
286
249
|
lambda{|t| t.strftime("%s.%6N").to_f },
|
|
287
250
|
lambda{|recv, expected, actual|
|
|
288
251
|
recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
|
|
289
252
|
}
|
|
290
253
|
],
|
|
291
254
|
[
|
|
292
|
-
"%Y-%m-%dT%H:%M:%S%:z",
|
|
255
|
+
"%Y-%m-%dT%H:%M:%S%:z",
|
|
293
256
|
lambda{|t| t.iso8601 },
|
|
294
257
|
:assert_equal.to_proc
|
|
295
258
|
],
|
|
296
|
-
].each do |format,
|
|
259
|
+
].each do |format, expect_time, assert|
|
|
297
260
|
define_method("test_time_formats_#{format}") do
|
|
298
261
|
now = Fluent::Engine.now
|
|
299
262
|
input = {}
|
|
@@ -311,7 +274,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
311
274
|
time_type string
|
|
312
275
|
time_key time
|
|
313
276
|
</inject>
|
|
314
|
-
#{type} time
|
|
315
277
|
|
|
316
278
|
schema [
|
|
317
279
|
{"name": "metadata", "type": "RECORD", "fields": [
|
|
@@ -479,9 +441,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
479
441
|
schema [{"name": "time", "type": "INTEGER"}]
|
|
480
442
|
CONFIG
|
|
481
443
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
444
|
+
stub_writer(driver) do |writer|
|
|
445
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
|
446
|
+
sudo_schema_response["schema"]["fields"]
|
|
447
|
+
end
|
|
485
448
|
end
|
|
486
449
|
|
|
487
450
|
buf = nil
|
|
@@ -547,9 +510,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
547
510
|
</buffer>
|
|
548
511
|
CONFIG
|
|
549
512
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
513
|
+
stub_writer(driver) do |writer|
|
|
514
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
|
515
|
+
sudo_schema_response["schema"]["fields"]
|
|
516
|
+
end
|
|
553
517
|
end
|
|
554
518
|
|
|
555
519
|
buf = nil
|
|
@@ -689,72 +653,21 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
689
653
|
assert_equal expected, MultiJson.load(buf)
|
|
690
654
|
end
|
|
691
655
|
|
|
692
|
-
def test_convert_hash_to_json
|
|
693
|
-
now = Fluent::EventTime.now
|
|
694
|
-
input = {
|
|
695
|
-
"vhost" => :bar,
|
|
696
|
-
"referer" => "http://referer.example",
|
|
697
|
-
"bot_access" => true,
|
|
698
|
-
"loginsession" => false,
|
|
699
|
-
"remote" => {
|
|
700
|
-
"host" => "remote.example",
|
|
701
|
-
"ip" => "192.0.2.1",
|
|
702
|
-
"port" => 12345,
|
|
703
|
-
"user" => "tagomoris",
|
|
704
|
-
}
|
|
705
|
-
}
|
|
706
|
-
expected = {
|
|
707
|
-
"time" => now.to_i,
|
|
708
|
-
"vhost" => "bar",
|
|
709
|
-
"referer" => "http://referer.example",
|
|
710
|
-
"bot_access" => true,
|
|
711
|
-
"loginsession" => false,
|
|
712
|
-
"remote" => "{\"host\":\"remote.example\",\"ip\":\"192.0.2.1\",\"port\":12345,\"user\":\"tagomoris\"}"
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
driver = create_driver(<<-CONFIG)
|
|
716
|
-
table foo
|
|
717
|
-
email foo@bar.example
|
|
718
|
-
private_key_path /path/to/key
|
|
719
|
-
project yourproject_id
|
|
720
|
-
dataset yourdataset_id
|
|
721
|
-
|
|
722
|
-
convert_hash_to_json true
|
|
723
|
-
|
|
724
|
-
<inject>
|
|
725
|
-
time_format %s
|
|
726
|
-
time_key time
|
|
727
|
-
</inject>
|
|
728
|
-
|
|
729
|
-
schema [
|
|
730
|
-
{"name": "time", "type": "INTEGER"},
|
|
731
|
-
{"name": "vhost", "type": "STRING"},
|
|
732
|
-
{"name": "refere", "type": "STRING"},
|
|
733
|
-
{"name": "bot_access", "type": "BOOLEAN"},
|
|
734
|
-
{"name": "loginsession", "type": "BOOLEAN"}
|
|
735
|
-
]
|
|
736
|
-
CONFIG
|
|
737
|
-
|
|
738
|
-
buf = nil
|
|
739
|
-
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
|
740
|
-
|
|
741
|
-
assert_equal expected, MultiJson.load(buf)
|
|
742
|
-
end
|
|
743
|
-
|
|
744
656
|
def test_write
|
|
745
657
|
entry = {a: "b"}
|
|
746
658
|
driver = create_driver
|
|
747
659
|
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
660
|
+
stub_writer(driver) do |writer|
|
|
661
|
+
mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
|
|
662
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
|
663
|
+
rows: [{json: hash_including(entry)}],
|
|
664
|
+
skip_invalid_rows: false,
|
|
665
|
+
ignore_unknown_values: false
|
|
666
|
+
}, {}) do
|
|
667
|
+
s = stub!
|
|
668
|
+
s.insert_errors { nil }
|
|
669
|
+
s
|
|
670
|
+
end
|
|
758
671
|
end
|
|
759
672
|
|
|
760
673
|
driver.run do
|
|
@@ -810,14 +723,15 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
810
723
|
CONFIG
|
|
811
724
|
|
|
812
725
|
entry = {a: "b"}
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
726
|
+
stub_writer(driver) do |writer|
|
|
727
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
|
728
|
+
rows: [{json: hash_including(entry)}],
|
|
729
|
+
skip_invalid_rows: false,
|
|
730
|
+
ignore_unknown_values: false
|
|
731
|
+
}, {}) do
|
|
732
|
+
ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
|
|
733
|
+
raise ex
|
|
734
|
+
end
|
|
821
735
|
end
|
|
822
736
|
|
|
823
737
|
assert_raise(Fluent::BigQuery::RetryableError) do
|
|
@@ -868,17 +782,18 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
868
782
|
CONFIG
|
|
869
783
|
|
|
870
784
|
entry = {a: "b"}
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
785
|
+
stub_writer(driver) do |writer|
|
|
786
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
|
787
|
+
rows: [{json: hash_including(entry)}],
|
|
788
|
+
skip_invalid_rows: false,
|
|
789
|
+
ignore_unknown_values: false
|
|
790
|
+
}, {}) do
|
|
791
|
+
ex = Google::Apis::ServerError.new("error", status_code: 501)
|
|
792
|
+
def ex.reason
|
|
793
|
+
"invalid"
|
|
794
|
+
end
|
|
795
|
+
raise ex
|
|
880
796
|
end
|
|
881
|
-
raise ex
|
|
882
797
|
end
|
|
883
798
|
|
|
884
799
|
driver.instance_start
|
|
@@ -915,33 +830,36 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
915
830
|
CONFIG
|
|
916
831
|
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
|
|
917
832
|
|
|
918
|
-
writer = stub_writer(driver)
|
|
919
833
|
io = StringIO.new("hello")
|
|
920
834
|
mock(driver.instance).create_upload_source(is_a(Fluent::Plugin::Buffer::Chunk)).yields(io)
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
835
|
+
stub_writer(driver) do |writer|
|
|
836
|
+
mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
|
837
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
|
838
|
+
|
|
839
|
+
mock(writer.client).insert_job('yourproject_id', {
|
|
840
|
+
configuration: {
|
|
841
|
+
load: {
|
|
842
|
+
destination_table: {
|
|
843
|
+
project_id: 'yourproject_id',
|
|
844
|
+
dataset_id: 'yourdataset_id',
|
|
845
|
+
table_id: 'foo',
|
|
846
|
+
},
|
|
847
|
+
schema: {
|
|
848
|
+
fields: schema_fields,
|
|
849
|
+
},
|
|
850
|
+
write_disposition: "WRITE_APPEND",
|
|
851
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
|
852
|
+
ignore_unknown_values: false,
|
|
853
|
+
max_bad_records: 0,
|
|
854
|
+
}
|
|
937
855
|
}
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
856
|
+
}, {upload_source: io, content_type: "application/octet-stream"}) do
|
|
857
|
+
s = stub!
|
|
858
|
+
job_reference_stub = stub!
|
|
859
|
+
s.job_reference { job_reference_stub }
|
|
860
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
|
861
|
+
s
|
|
862
|
+
end
|
|
945
863
|
end
|
|
946
864
|
|
|
947
865
|
driver.run do
|
|
@@ -973,32 +891,35 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
973
891
|
|
|
974
892
|
io = StringIO.new("hello")
|
|
975
893
|
mock(driver.instance).create_upload_source(is_a(Fluent::Plugin::Buffer::Chunk)).yields(io)
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
894
|
+
stub_writer(driver) do |writer|
|
|
895
|
+
mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
|
896
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
|
897
|
+
|
|
898
|
+
mock(writer.client).insert_job('yourproject_id', {
|
|
899
|
+
configuration: {
|
|
900
|
+
load: {
|
|
901
|
+
destination_table: {
|
|
902
|
+
project_id: 'yourproject_id',
|
|
903
|
+
dataset_id: 'yourdataset_id',
|
|
904
|
+
table_id: 'foo',
|
|
905
|
+
},
|
|
906
|
+
schema: {
|
|
907
|
+
fields: schema_fields,
|
|
908
|
+
},
|
|
909
|
+
write_disposition: "WRITE_APPEND",
|
|
910
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
|
911
|
+
ignore_unknown_values: false,
|
|
912
|
+
max_bad_records: 0,
|
|
985
913
|
},
|
|
986
|
-
schema: {
|
|
987
|
-
fields: schema_fields,
|
|
988
|
-
},
|
|
989
|
-
write_disposition: "WRITE_APPEND",
|
|
990
|
-
source_format: "NEWLINE_DELIMITED_JSON",
|
|
991
|
-
ignore_unknown_values: false,
|
|
992
|
-
max_bad_records: 0,
|
|
993
914
|
},
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
915
|
+
job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
|
|
916
|
+
}, {upload_source: io, content_type: "application/octet-stream"}) do
|
|
917
|
+
s = stub!
|
|
918
|
+
job_reference_stub = stub!
|
|
919
|
+
s.job_reference { job_reference_stub }
|
|
920
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
|
921
|
+
s
|
|
922
|
+
end
|
|
1002
923
|
end
|
|
1003
924
|
|
|
1004
925
|
driver.run do
|
|
@@ -1013,7 +934,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1013
934
|
table foo
|
|
1014
935
|
email foo@bar.example
|
|
1015
936
|
private_key_path /path/to/key
|
|
1016
|
-
project
|
|
937
|
+
project yourproject-id
|
|
1017
938
|
dataset yourdataset_id
|
|
1018
939
|
|
|
1019
940
|
<inject>
|
|
@@ -1036,44 +957,48 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1036
957
|
|
|
1037
958
|
io = StringIO.new("hello")
|
|
1038
959
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
960
|
+
|
|
961
|
+
stub_writer(driver) do |writer|
|
|
962
|
+
mock(writer.client).get_table('yourproject-id', 'yourdataset_id', 'foo') { nil }
|
|
963
|
+
|
|
964
|
+
mock(writer.client).insert_job('yourproject-id', {
|
|
965
|
+
configuration: {
|
|
966
|
+
load: {
|
|
967
|
+
destination_table: {
|
|
968
|
+
project_id: 'yourproject-id',
|
|
969
|
+
dataset_id: 'yourdataset_id',
|
|
970
|
+
table_id: 'foo',
|
|
971
|
+
},
|
|
972
|
+
schema: {
|
|
973
|
+
fields: schema_fields,
|
|
974
|
+
},
|
|
975
|
+
write_disposition: "WRITE_APPEND",
|
|
976
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
|
977
|
+
ignore_unknown_values: false,
|
|
978
|
+
max_bad_records: 0,
|
|
979
|
+
}
|
|
1055
980
|
}
|
|
1056
|
-
}
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
end
|
|
981
|
+
}, {upload_source: io, content_type: "application/octet-stream"}) do
|
|
982
|
+
s = stub!
|
|
983
|
+
job_reference_stub = stub!
|
|
984
|
+
s.job_reference { job_reference_stub }
|
|
985
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
|
986
|
+
s
|
|
987
|
+
end
|
|
1064
988
|
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
989
|
+
mock(writer.client).get_job('yourproject-id', 'dummy_job_id') do
|
|
990
|
+
s = stub!
|
|
991
|
+
status_stub = stub!
|
|
992
|
+
error_result = stub!
|
|
993
|
+
|
|
994
|
+
s.status { status_stub }
|
|
995
|
+
status_stub.state { "DONE" }
|
|
996
|
+
status_stub.error_result { error_result }
|
|
997
|
+
status_stub.errors { nil }
|
|
998
|
+
error_result.message { "error" }
|
|
999
|
+
error_result.reason { "backendError" }
|
|
1000
|
+
s
|
|
1001
|
+
end
|
|
1077
1002
|
end
|
|
1078
1003
|
|
|
1079
1004
|
assert_raise Fluent::BigQuery::RetryableError do
|
|
@@ -1117,44 +1042,47 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1117
1042
|
|
|
1118
1043
|
io = StringIO.new("hello")
|
|
1119
1044
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1045
|
+
stub_writer(driver) do |writer|
|
|
1046
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
|
1047
|
+
|
|
1048
|
+
mock(writer.client).insert_job('yourproject_id', {
|
|
1049
|
+
configuration: {
|
|
1050
|
+
load: {
|
|
1051
|
+
destination_table: {
|
|
1052
|
+
project_id: 'yourproject_id',
|
|
1053
|
+
dataset_id: 'yourdataset_id',
|
|
1054
|
+
table_id: 'foo',
|
|
1055
|
+
},
|
|
1056
|
+
schema: {
|
|
1057
|
+
fields: schema_fields,
|
|
1058
|
+
},
|
|
1059
|
+
write_disposition: "WRITE_APPEND",
|
|
1060
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
|
1061
|
+
ignore_unknown_values: false,
|
|
1062
|
+
max_bad_records: 0,
|
|
1063
|
+
}
|
|
1136
1064
|
}
|
|
1137
|
-
}
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
end
|
|
1065
|
+
}, {upload_source: io, content_type: "application/octet-stream"}) do
|
|
1066
|
+
s = stub!
|
|
1067
|
+
job_reference_stub = stub!
|
|
1068
|
+
s.job_reference { job_reference_stub }
|
|
1069
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
|
1070
|
+
s
|
|
1071
|
+
end
|
|
1145
1072
|
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1073
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
|
|
1074
|
+
s = stub!
|
|
1075
|
+
status_stub = stub!
|
|
1076
|
+
error_result = stub!
|
|
1077
|
+
|
|
1078
|
+
s.status { status_stub }
|
|
1079
|
+
status_stub.state { "DONE" }
|
|
1080
|
+
status_stub.error_result { error_result }
|
|
1081
|
+
status_stub.errors { nil }
|
|
1082
|
+
error_result.message { "error" }
|
|
1083
|
+
error_result.reason { "invalid" }
|
|
1084
|
+
s
|
|
1085
|
+
end
|
|
1158
1086
|
end
|
|
1159
1087
|
|
|
1160
1088
|
assert_raise Fluent::BigQuery::UnRetryableError do
|
|
@@ -1182,12 +1110,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1182
1110
|
]
|
|
1183
1111
|
CONFIG
|
|
1184
1112
|
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1113
|
+
stub_writer(driver) do |writer|
|
|
1114
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
|
|
1115
|
+
rows: [entry[0]],
|
|
1116
|
+
skip_invalid_rows: false,
|
|
1117
|
+
ignore_unknown_values: false
|
|
1118
|
+
}, {}) { stub!.insert_errors { nil } }
|
|
1119
|
+
end
|
|
1191
1120
|
|
|
1192
1121
|
driver.run do
|
|
1193
1122
|
driver.feed("tag", Time.now.to_i, {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")})
|
|
@@ -1235,11 +1164,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1235
1164
|
auto_create_table true
|
|
1236
1165
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
|
1237
1166
|
CONFIG
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1167
|
+
|
|
1168
|
+
stub_writer(driver) do |writer|
|
|
1169
|
+
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
|
|
1170
|
+
raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
|
|
1171
|
+
end
|
|
1172
|
+
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
|
|
1241
1173
|
end
|
|
1242
|
-
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
|
|
1243
1174
|
|
|
1244
1175
|
assert_raise(RuntimeError) do
|
|
1245
1176
|
driver.run do
|
|
@@ -1292,11 +1223,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1292
1223
|
time_partitioning_type day
|
|
1293
1224
|
time_partitioning_expiration 1h
|
|
1294
1225
|
CONFIG
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1226
|
+
|
|
1227
|
+
stub_writer(driver) do |writer|
|
|
1228
|
+
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
|
|
1229
|
+
raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
|
|
1230
|
+
end
|
|
1231
|
+
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
|
|
1298
1232
|
end
|
|
1299
|
-
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
|
|
1300
1233
|
|
|
1301
1234
|
assert_raise(RuntimeError) do
|
|
1302
1235
|
driver.run do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-bigquery
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Naoya Ito
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2017-
|
|
12
|
+
date: 2017-10-13 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rake
|
|
@@ -73,20 +73,14 @@ dependencies:
|
|
|
73
73
|
requirements:
|
|
74
74
|
- - ">="
|
|
75
75
|
- !ruby/object:Gem::Version
|
|
76
|
-
version: 0.
|
|
77
|
-
- - "<"
|
|
78
|
-
- !ruby/object:Gem::Version
|
|
79
|
-
version: '0.14'
|
|
76
|
+
version: 0.11.0
|
|
80
77
|
type: :runtime
|
|
81
78
|
prerelease: false
|
|
82
79
|
version_requirements: !ruby/object:Gem::Requirement
|
|
83
80
|
requirements:
|
|
84
81
|
- - ">="
|
|
85
82
|
- !ruby/object:Gem::Version
|
|
86
|
-
version: 0.
|
|
87
|
-
- - "<"
|
|
88
|
-
- !ruby/object:Gem::Version
|
|
89
|
-
version: '0.14'
|
|
83
|
+
version: 0.11.0
|
|
90
84
|
- !ruby/object:Gem::Dependency
|
|
91
85
|
name: googleauth
|
|
92
86
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -138,6 +132,7 @@ executables: []
|
|
|
138
132
|
extensions: []
|
|
139
133
|
extra_rdoc_files: []
|
|
140
134
|
files:
|
|
135
|
+
- ".github/ISSUE_TEMPLATE.md"
|
|
141
136
|
- ".gitignore"
|
|
142
137
|
- ".travis.yml"
|
|
143
138
|
- Gemfile
|
|
@@ -179,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
179
174
|
version: '0'
|
|
180
175
|
requirements: []
|
|
181
176
|
rubyforge_project:
|
|
182
|
-
rubygems_version: 2.6.
|
|
177
|
+
rubygems_version: 2.6.12
|
|
183
178
|
signing_key:
|
|
184
179
|
specification_version: 4
|
|
185
180
|
summary: Fluentd plugin to store data on Google BigQuery
|