fluent-plugin-bigquery 0.3.4 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +102 -65
- data/lib/fluent/plugin/bigquery/schema.rb +52 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +21 -5
- data/lib/fluent/plugin/out_bigquery.rb +15 -11
- data/test/plugin/test_out_bigquery.rb +120 -238
- data/test/plugin/test_record_schema.rb +17 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d4074dc903c423acbebd56b2b4d6fc0ce110510
|
4
|
+
data.tar.gz: 4d17cd1b2ee3768b83845105b5b9a714835e0a4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f99c64e394650b7eac03e6872dcfafb36981f48a726d8aba9d87fc83b45329ebac925c7d1239113995597e4697d8afcf1f9397c8583d0a3bbe11d47aedd668b
|
7
|
+
data.tar.gz: 52554bcd622e75486fc8a10ceeebd8af958ac5523869f2ae964324c1348b734fcef00c4232766484a0d3112c15b50eb06f334b6efaf2cd55394321139bc1df9e
|
data/README.md
CHANGED
@@ -21,47 +21,48 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
|
|
21
21
|
|
22
22
|
### Options
|
23
23
|
|
24
|
-
| name | type | required?
|
25
|
-
| :------------------------------------- | :------------ | :-----------
|
26
|
-
| method | string | no
|
27
|
-
| buffer_type | string | no
|
28
|
-
| buffer_chunk_limit | integer | no
|
29
|
-
| buffer_queue_limit | integer | no
|
30
|
-
| buffer_chunk_records_limit | integer | no
|
31
|
-
| flush_interval | float | no
|
32
|
-
| try_flush_interval | float | no
|
33
|
-
| auth_method | enum | yes
|
34
|
-
| email | string | yes (private_key)
|
35
|
-
| private_key_path | string | yes (private_key)
|
36
|
-
| private_key_passphrase | string | yes (private_key)
|
37
|
-
| json_key | string | yes (json_key)
|
38
|
-
| project | string | yes
|
39
|
-
| table | string | yes (either `tables`)
|
40
|
-
| tables | string | yes (either `table`)
|
41
|
-
| template_suffix | string | no
|
42
|
-
| auto_create_table | bool | no
|
43
|
-
| skip_invalid_rows | bool | no
|
44
|
-
| max_bad_records | integer | no
|
45
|
-
| ignore_unknown_values | bool | no
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
24
|
+
| name | type | required? | default | description |
|
25
|
+
| :------------------------------------- | :------------ | :----------- | :------------------------- | :----------------------- |
|
26
|
+
| method | string | no | insert | `insert` (Streaming Insert) or `load` (load job) |
|
27
|
+
| buffer_type | string | no | lightening (insert) or file (load) | |
|
28
|
+
| buffer_chunk_limit | integer | no | 1MB (insert) or 1GB (load) | |
|
29
|
+
| buffer_queue_limit | integer | no | 1024 (insert) or 32 (load) | |
|
30
|
+
| buffer_chunk_records_limit | integer | no | 500 | |
|
31
|
+
| flush_interval | float | no | 0.25 (*insert) or default of time sliced output (load) | |
|
32
|
+
| try_flush_interval | float | no | 0.05 (*insert) or default of time sliced output (load) | |
|
33
|
+
| auth_method | enum | yes | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
34
|
+
| email | string | yes (private_key) | nil | GCP Service Account Email |
|
35
|
+
| private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
|
36
|
+
| private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
|
37
|
+
| json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
|
38
|
+
| project | string | yes | nil | |
|
39
|
+
| table | string | yes (either `tables`) | nil | |
|
40
|
+
| tables | string | yes (either `table`) | nil | can set multi table names splitted by `,` |
|
41
|
+
| template_suffix | string | no | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
|
42
|
+
| auto_create_table | bool | no | false | If true, creates table automatically |
|
43
|
+
| skip_invalid_rows | bool | no | false | Only `insert` method. |
|
44
|
+
| max_bad_records | integer | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
|
45
|
+
| ignore_unknown_values | bool | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
46
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | nil | Schema Definition. It is formatted by JSON. |
|
47
|
+
| schema_path | string | yes (either `fetch_schema`) | nil | Schema Definition file path. It is formatted by JSON. |
|
48
|
+
| fetch_schema | bool | yes (either `schema_path`) | false | If true, fetch table schema definition from Bigquery table automatically. |
|
49
|
+
| fetch_schema_table | string | no | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
50
|
+
| schema_cache_expire | integer | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
51
|
+
| field_string (deprecated) | string | no | nil | see examples. |
|
52
|
+
| field_integer (deprecated) | string | no | nil | see examples. |
|
53
|
+
| field_float (deprecated) | string | no | nil | see examples. |
|
54
|
+
| field_boolean (deprecated) | string | no | nil | see examples. |
|
55
|
+
| field_timestamp (deprecated) | string | no | nil | see examples. |
|
56
|
+
| time_field | string | no | nil | If this param is set, plugin set formatted time string to this field. |
|
57
|
+
| time_format | string | no | nil | ex. `%s`, `%Y/%m%d %H:%M:%S` |
|
58
|
+
| replace_record_key | bool | no | false | see examples. |
|
59
|
+
| replace_record_key_regexp{1-10} | string | no | nil | see examples. |
|
60
|
+
| convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
|
61
|
+
| insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
|
62
|
+
| request_timeout_sec | integer | no | nil | Bigquery API response timeout |
|
63
|
+
| request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
64
|
+
| time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
65
|
+
| time_partitioning_expiration | time | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
65
66
|
|
66
67
|
### Standard Options
|
67
68
|
|
@@ -96,10 +97,25 @@ Configure insert specifications with target table schema, with your credentials.
|
|
96
97
|
time_format %s
|
97
98
|
time_field time
|
98
99
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
100
|
+
schema [
|
101
|
+
{"name": "time", "type": "INTEGER"},
|
102
|
+
{"name": "status", "type": "INTEGER"},
|
103
|
+
{"name": "bytes", "type": "INTEGER"},
|
104
|
+
{"name": "vhost", "type": "STRING"},
|
105
|
+
{"name": "path", "type": "STRING"},
|
106
|
+
{"name": "method", "type": "STRING"},
|
107
|
+
{"name": "protocol", "type": "STRING"},
|
108
|
+
{"name": "agent", "type": "STRING"},
|
109
|
+
{"name": "referer", "type": "STRING"},
|
110
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
111
|
+
{"name": "host", "type": "STRING"},
|
112
|
+
{"name": "ip", "type": "STRING"},
|
113
|
+
{"name": "user", "type": "STRING"}
|
114
|
+
]},
|
115
|
+
{"name": "requesttime", "type": "FLOAT"},
|
116
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
117
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
118
|
+
]
|
103
119
|
</match>
|
104
120
|
```
|
105
121
|
|
@@ -130,10 +146,25 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
130
146
|
time_format %s
|
131
147
|
time_field time
|
132
148
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
149
|
+
schema [
|
150
|
+
{"name": "time", "type": "INTEGER"},
|
151
|
+
{"name": "status", "type": "INTEGER"},
|
152
|
+
{"name": "bytes", "type": "INTEGER"},
|
153
|
+
{"name": "vhost", "type": "STRING"},
|
154
|
+
{"name": "path", "type": "STRING"},
|
155
|
+
{"name": "method", "type": "STRING"},
|
156
|
+
{"name": "protocol", "type": "STRING"},
|
157
|
+
{"name": "agent", "type": "STRING"},
|
158
|
+
{"name": "referer", "type": "STRING"},
|
159
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
160
|
+
{"name": "host", "type": "STRING"},
|
161
|
+
{"name": "ip", "type": "STRING"},
|
162
|
+
{"name": "user", "type": "STRING"}
|
163
|
+
]},
|
164
|
+
{"name": "requesttime", "type": "FLOAT"},
|
165
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
166
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
167
|
+
]
|
137
168
|
</match>
|
138
169
|
```
|
139
170
|
|
@@ -266,11 +297,7 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
266
297
|
|
267
298
|
time_format %s
|
268
299
|
time_field time
|
269
|
-
|
270
|
-
field_integer time,status,bytes
|
271
|
-
field_string rhost,vhost,path,method,protocol,agent,referer
|
272
|
-
field_float requesttime
|
273
|
-
field_boolean bot_access,loginsession
|
300
|
+
...
|
274
301
|
</match>
|
275
302
|
```
|
276
303
|
|
@@ -419,10 +446,25 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
419
446
|
time_format %s
|
420
447
|
time_field time
|
421
448
|
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
449
|
+
schema [
|
450
|
+
{"name": "time", "type": "INTEGER"},
|
451
|
+
{"name": "status", "type": "INTEGER"},
|
452
|
+
{"name": "bytes", "type": "INTEGER"},
|
453
|
+
{"name": "vhost", "type": "STRING"},
|
454
|
+
{"name": "path", "type": "STRING"},
|
455
|
+
{"name": "method", "type": "STRING"},
|
456
|
+
{"name": "protocol", "type": "STRING"},
|
457
|
+
{"name": "agent", "type": "STRING"},
|
458
|
+
{"name": "referer", "type": "STRING"},
|
459
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
460
|
+
{"name": "host", "type": "STRING"},
|
461
|
+
{"name": "ip", "type": "STRING"},
|
462
|
+
{"name": "user", "type": "STRING"}
|
463
|
+
]},
|
464
|
+
{"name": "requesttime", "type": "FLOAT"},
|
465
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
466
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
467
|
+
]
|
426
468
|
</match>
|
427
469
|
```
|
428
470
|
|
@@ -459,10 +501,9 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
459
501
|
time_field time
|
460
502
|
|
461
503
|
schema_path /path/to/httpd.schema
|
462
|
-
field_integer time
|
463
504
|
</match>
|
464
505
|
```
|
465
|
-
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
|
506
|
+
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexbility.
|
466
507
|
|
467
508
|
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
468
509
|
|
@@ -477,7 +518,6 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
477
518
|
|
478
519
|
fetch_schema true
|
479
520
|
# fetch_schema_table other_table # if you want to fetch schema from other table
|
480
|
-
field_integer time
|
481
521
|
</match>
|
482
522
|
```
|
483
523
|
|
@@ -498,17 +538,14 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
498
538
|
...
|
499
539
|
|
500
540
|
insert_id_field uuid
|
501
|
-
|
541
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
502
542
|
</match>
|
503
543
|
```
|
504
544
|
|
505
545
|
## TODO
|
506
546
|
|
507
|
-
* support optional data fields
|
508
|
-
* support NULLABLE/REQUIRED/REPEATED field options in field list style of configuration
|
509
547
|
* OAuth installed application credentials support
|
510
548
|
* Google API discovery expiration
|
511
|
-
* Error classes
|
512
549
|
* check row size limits
|
513
550
|
|
514
551
|
## Authors
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'multi_json'
|
2
|
+
|
1
3
|
module Fluent
|
2
4
|
module BigQuery
|
3
5
|
class FieldSchema
|
@@ -56,7 +58,11 @@ module Fluent
|
|
56
58
|
end
|
57
59
|
|
58
60
|
def format_one(value)
|
59
|
-
value.
|
61
|
+
if value.is_a?(Hash) || value.is_a?(Array)
|
62
|
+
MultiJson.dump(value)
|
63
|
+
else
|
64
|
+
value.to_s
|
65
|
+
end
|
60
66
|
end
|
61
67
|
end
|
62
68
|
|
@@ -116,6 +122,48 @@ module Fluent
|
|
116
122
|
end
|
117
123
|
end
|
118
124
|
|
125
|
+
class DateFieldSchema < FieldSchema
|
126
|
+
def type
|
127
|
+
:date
|
128
|
+
end
|
129
|
+
|
130
|
+
def format_one(value)
|
131
|
+
if value.respond_to?(:strftime)
|
132
|
+
value.strftime("%Y-%m-%d")
|
133
|
+
else
|
134
|
+
value
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
class DateTimeFieldSchema < FieldSchema
|
140
|
+
def type
|
141
|
+
:datetime
|
142
|
+
end
|
143
|
+
|
144
|
+
def format_one(value)
|
145
|
+
if value.respond_to?(:strftime)
|
146
|
+
value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
|
147
|
+
else
|
148
|
+
value
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
class TimeFieldSchema < FieldSchema
|
154
|
+
def type
|
155
|
+
:time
|
156
|
+
end
|
157
|
+
|
158
|
+
def format_one(value)
|
159
|
+
if value.respond_to?(:strftime)
|
160
|
+
value.strftime("%H:%M:%S.%6L")
|
161
|
+
else
|
162
|
+
value
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
119
167
|
class RecordSchema < FieldSchema
|
120
168
|
FIELD_TYPES = {
|
121
169
|
string: StringFieldSchema,
|
@@ -123,6 +171,9 @@ module Fluent
|
|
123
171
|
float: FloatFieldSchema,
|
124
172
|
boolean: BooleanFieldSchema,
|
125
173
|
timestamp: TimestampFieldSchema,
|
174
|
+
date: DateFieldSchema,
|
175
|
+
datetime: DateTimeFieldSchema,
|
176
|
+
time: TimeFieldSchema,
|
126
177
|
record: RecordSchema
|
127
178
|
}.freeze
|
128
179
|
|
@@ -6,6 +6,7 @@ module Fluent
|
|
6
6
|
@scope = "https://www.googleapis.com/auth/bigquery"
|
7
7
|
@auth_options = auth_options
|
8
8
|
@log = log
|
9
|
+
@num_errors_per_chunk = {}
|
9
10
|
|
10
11
|
@cached_client_expiration = Time.now + 1800
|
11
12
|
end
|
@@ -104,7 +105,7 @@ module Fluent
|
|
104
105
|
raise Fluent::BigQuery::Error.wrap(e)
|
105
106
|
end
|
106
107
|
|
107
|
-
def create_load_job(project, dataset, table_id, upload_source,
|
108
|
+
def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
|
108
109
|
configuration = {
|
109
110
|
configuration: {
|
110
111
|
load: {
|
@@ -123,6 +124,8 @@ module Fluent
|
|
123
124
|
}
|
124
125
|
}
|
125
126
|
}
|
127
|
+
|
128
|
+
job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
|
126
129
|
configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
|
127
130
|
configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
|
128
131
|
|
@@ -148,7 +151,8 @@ module Fluent
|
|
148
151
|
}
|
149
152
|
}
|
150
153
|
)
|
151
|
-
wait_load_job(project, dataset, res.job_reference.job_id, table_id)
|
154
|
+
wait_load_job(chunk_id, project, dataset, res.job_reference.job_id, table_id)
|
155
|
+
@num_errors_per_chunk.delete(chunk_id)
|
152
156
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
153
157
|
@client = nil
|
154
158
|
|
@@ -161,12 +165,16 @@ module Fluent
|
|
161
165
|
raise "table created. send rows next time."
|
162
166
|
end
|
163
167
|
|
164
|
-
|
168
|
+
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
169
|
+
wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
170
|
+
@num_errors_per_chunk.delete(chunk_id)
|
171
|
+
return
|
172
|
+
end
|
165
173
|
|
166
174
|
raise Fluent::BigQuery::Error.wrap(e)
|
167
175
|
end
|
168
176
|
|
169
|
-
def wait_load_job(project, dataset, job_id, table_id
|
177
|
+
def wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
170
178
|
wait_interval = 10
|
171
179
|
_response = client.get_job(project, job_id)
|
172
180
|
|
@@ -186,9 +194,11 @@ module Fluent
|
|
186
194
|
error_result = _response.status.error_result
|
187
195
|
if error_result
|
188
196
|
log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
|
189
|
-
if
|
197
|
+
if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
|
198
|
+
@num_errors_per_chunk[chunk_id] = @num_errors_per_chunk[chunk_id].to_i + 1
|
190
199
|
raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
|
191
200
|
else
|
201
|
+
@num_errors_per_chunk.delete(chunk_id)
|
192
202
|
raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
193
203
|
end
|
194
204
|
end
|
@@ -259,6 +269,12 @@ module Fluent
|
|
259
269
|
def safe_table_id(table_id)
|
260
270
|
table_id.gsub(/\$\d+$/, "")
|
261
271
|
end
|
272
|
+
|
273
|
+
def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
|
274
|
+
job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
|
275
|
+
@log.debug "job_id_key: #{job_id_key}"
|
276
|
+
"fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
|
277
|
+
end
|
262
278
|
end
|
263
279
|
end
|
264
280
|
end
|
@@ -87,6 +87,7 @@ module Fluent
|
|
87
87
|
# Default is false, which treats unknown values as errors.
|
88
88
|
config_param :ignore_unknown_values, :bool, default: false
|
89
89
|
|
90
|
+
config_param :schema, :array, default: nil
|
90
91
|
config_param :schema_path, :string, default: nil
|
91
92
|
config_param :fetch_schema, :bool, default: false
|
92
93
|
config_param :fetch_schema_table, :string, default: nil
|
@@ -213,7 +214,11 @@ module Fluent
|
|
213
214
|
|
214
215
|
@tablelist = @tables ? @tables.split(',') : [@table]
|
215
216
|
|
217
|
+
legacy_schema_config_deprecation
|
216
218
|
@fields = Fluent::BigQuery::RecordSchema.new('record')
|
219
|
+
if @schema
|
220
|
+
@fields.load_schema(@schema)
|
221
|
+
end
|
217
222
|
if @schema_path
|
218
223
|
@fields.load_schema(MultiJson.load(File.read(@schema_path)))
|
219
224
|
end
|
@@ -259,6 +264,8 @@ module Fluent
|
|
259
264
|
else
|
260
265
|
@get_insert_id = nil
|
261
266
|
end
|
267
|
+
|
268
|
+
warn "[DEPRECATION] `convert_hash_to_json` param is deprecated. If Hash value is inserted string field, plugin convert it to json automatically." if @convert_hash_to_json
|
262
269
|
end
|
263
270
|
|
264
271
|
def start
|
@@ -329,6 +336,12 @@ module Fluent
|
|
329
336
|
record
|
330
337
|
end
|
331
338
|
|
339
|
+
def legacy_schema_config_deprecation
|
340
|
+
if [@field_string, @field_integer, @field_float, @field_boolean, @field_timestamp].any?
|
341
|
+
warn "[DEPRECATION] `field_*` style schema config is deprecated. Instead of it, use `schema` config params that is array of json style."
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
332
345
|
def write(chunk)
|
333
346
|
table_id_format = @tables_mutex.synchronize do
|
334
347
|
t = @tables_queue.shift
|
@@ -455,14 +468,9 @@ module Fluent
|
|
455
468
|
def load(chunk, table_id)
|
456
469
|
res = nil
|
457
470
|
|
458
|
-
if @prevent_duplicate_load
|
459
|
-
job_id = create_job_id(chunk, @dataset, table_id, @fields.to_a, @max_bad_records, @ignore_unknown_values)
|
460
|
-
else
|
461
|
-
job_id = nil
|
462
|
-
end
|
463
|
-
|
464
471
|
create_upload_source(chunk) do |upload_source|
|
465
|
-
res = writer.create_load_job(@project, @dataset, table_id, upload_source,
|
472
|
+
res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
|
473
|
+
prevent_duplicate_load: @prevent_duplicate_load,
|
466
474
|
ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
|
467
475
|
timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
|
468
476
|
time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
|
@@ -494,10 +502,6 @@ module Fluent
|
|
494
502
|
end
|
495
503
|
end
|
496
504
|
end
|
497
|
-
|
498
|
-
def create_job_id(chunk, dataset, table, schema, max_bad_records, ignore_unknown_values)
|
499
|
-
"fluentd_job_" + Digest::SHA1.hexdigest("#{chunk.unique_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}")
|
500
|
-
end
|
501
505
|
end
|
502
506
|
end
|
503
507
|
end
|
@@ -22,16 +22,31 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
22
22
|
time_format %s
|
23
23
|
time_field time
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
schema [
|
26
|
+
{"name": "time", "type": "INTEGER"},
|
27
|
+
{"name": "status", "type": "INTEGER"},
|
28
|
+
{"name": "bytes", "type": "INTEGER"},
|
29
|
+
{"name": "vhost", "type": "STRING"},
|
30
|
+
{"name": "path", "type": "STRING"},
|
31
|
+
{"name": "method", "type": "STRING"},
|
32
|
+
{"name": "protocol", "type": "STRING"},
|
33
|
+
{"name": "agent", "type": "STRING"},
|
34
|
+
{"name": "referer", "type": "STRING"},
|
35
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
36
|
+
{"name": "host", "type": "STRING"},
|
37
|
+
{"name": "ip", "type": "STRING"},
|
38
|
+
{"name": "user", "type": "STRING"}
|
39
|
+
]},
|
40
|
+
{"name": "requesttime", "type": "FLOAT"},
|
41
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
42
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
43
|
+
]
|
29
44
|
]
|
30
45
|
|
31
46
|
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
32
47
|
|
33
48
|
def create_driver(conf = CONFIG)
|
34
|
-
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
|
49
|
+
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf, true)
|
35
50
|
end
|
36
51
|
|
37
52
|
def stub_writer(driver)
|
@@ -91,7 +106,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
91
106
|
auth_method compute_engine
|
92
107
|
project yourproject_id
|
93
108
|
dataset yourdataset_id
|
94
|
-
|
109
|
+
schema [
|
110
|
+
{"name": "time", "type": "INTEGER"},
|
111
|
+
{"name": "status", "type": "INTEGER"},
|
112
|
+
{"name": "bytes", "type": "INTEGER"}
|
113
|
+
]
|
95
114
|
])
|
96
115
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
97
116
|
driver.instance.writer
|
@@ -114,7 +133,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
114
133
|
json_key #{json_key_path}
|
115
134
|
project yourproject_id
|
116
135
|
dataset yourdataset_id
|
117
|
-
|
136
|
+
schema [
|
137
|
+
{"name": "time", "type": "INTEGER"},
|
138
|
+
{"name": "status", "type": "INTEGER"},
|
139
|
+
{"name": "bytes", "type": "INTEGER"}
|
140
|
+
]
|
118
141
|
])
|
119
142
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
120
143
|
driver.instance.writer
|
@@ -134,7 +157,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
134
157
|
json_key #{json_key_path}
|
135
158
|
project yourproject_id
|
136
159
|
dataset yourdataset_id
|
137
|
-
|
160
|
+
schema [
|
161
|
+
{"name": "time", "type": "INTEGER"},
|
162
|
+
{"name": "status", "type": "INTEGER"},
|
163
|
+
{"name": "bytes", "type": "INTEGER"}
|
164
|
+
]
|
138
165
|
])
|
139
166
|
assert_raises(Errno::EACCES) do
|
140
167
|
driver.instance.writer.client
|
@@ -147,9 +174,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
147
174
|
def test_configure_auth_json_key_as_string
|
148
175
|
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
149
176
|
json_key_io = StringIO.new(json_key)
|
150
|
-
mock(StringIO).new(json_key) { json_key_io }
|
151
177
|
authorization = Object.new
|
152
|
-
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
|
178
|
+
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
|
153
179
|
|
154
180
|
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
155
181
|
mock(cl).__send__(:authorization=, authorization) {}
|
@@ -162,7 +188,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
162
188
|
json_key #{json_key}
|
163
189
|
project yourproject_id
|
164
190
|
dataset yourdataset_id
|
165
|
-
|
191
|
+
schema [
|
192
|
+
{"name": "time", "type": "INTEGER"},
|
193
|
+
{"name": "status", "type": "INTEGER"},
|
194
|
+
{"name": "bytes", "type": "INTEGER"}
|
195
|
+
]
|
166
196
|
])
|
167
197
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
168
198
|
driver.instance.writer
|
@@ -183,7 +213,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
183
213
|
auth_method application_default
|
184
214
|
project yourproject_id
|
185
215
|
dataset yourdataset_id
|
186
|
-
|
216
|
+
schema [
|
217
|
+
{"name": "time", "type": "INTEGER"},
|
218
|
+
{"name": "status", "type": "INTEGER"},
|
219
|
+
{"name": "bytes", "type": "INTEGER"}
|
220
|
+
]
|
187
221
|
])
|
188
222
|
|
189
223
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
@@ -191,186 +225,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
191
225
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
192
226
|
end
|
193
227
|
|
194
|
-
def test_configure_fieldname_stripped
|
195
|
-
driver = create_driver(%[
|
196
|
-
table foo
|
197
|
-
email foo@bar.example
|
198
|
-
private_key_path /path/to/key
|
199
|
-
project yourproject_id
|
200
|
-
dataset yourdataset_id
|
201
|
-
|
202
|
-
time_format %s
|
203
|
-
time_field time
|
204
|
-
|
205
|
-
field_integer time , status , bytes
|
206
|
-
field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
|
207
|
-
field_float requesttime
|
208
|
-
field_boolean bot_access , loginsession
|
209
|
-
])
|
210
|
-
fields = driver.instance.instance_eval{ @fields }
|
211
|
-
|
212
|
-
assert (not fields['time ']), "tailing spaces must be stripped"
|
213
|
-
assert fields['time']
|
214
|
-
assert fields['status']
|
215
|
-
assert fields['bytes']
|
216
|
-
assert fields['_log_name']
|
217
|
-
assert fields['vhost']
|
218
|
-
assert fields['protocol']
|
219
|
-
assert fields['agent']
|
220
|
-
assert fields['referer']
|
221
|
-
assert fields['remote']['host']
|
222
|
-
assert fields['remote']['ip']
|
223
|
-
assert fields['remote']['user']
|
224
|
-
assert fields['requesttime']
|
225
|
-
assert fields['bot_access']
|
226
|
-
assert fields['loginsession']
|
227
|
-
end
|
228
|
-
|
229
|
-
def test_configure_invalid_fieldname
|
230
|
-
base = %[
|
231
|
-
table foo
|
232
|
-
email foo@bar.example
|
233
|
-
private_key_path /path/to/key
|
234
|
-
project yourproject_id
|
235
|
-
dataset yourdataset_id
|
236
|
-
|
237
|
-
time_format %s
|
238
|
-
time_field time
|
239
|
-
]
|
240
|
-
|
241
|
-
assert_raises(Fluent::ConfigError) do
|
242
|
-
create_driver(base + "field_integer time field\n")
|
243
|
-
end
|
244
|
-
assert_raises(Fluent::ConfigError) do
|
245
|
-
create_driver(base + "field_string my name\n")
|
246
|
-
end
|
247
|
-
assert_raises(Fluent::ConfigError) do
|
248
|
-
create_driver(base + "field_string remote.host name\n")
|
249
|
-
end
|
250
|
-
assert_raises(Fluent::ConfigError) do
|
251
|
-
create_driver(base + "field_string 1column\n")
|
252
|
-
end
|
253
|
-
assert_raises(Fluent::ConfigError) do
|
254
|
-
create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
|
255
|
-
end
|
256
|
-
assert_raises(Fluent::ConfigError) do
|
257
|
-
create_driver(base + "field_float request time\n")
|
258
|
-
end
|
259
|
-
assert_raises(Fluent::ConfigError) do
|
260
|
-
create_driver(base + "field_boolean login session\n")
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
def test_format_stream
|
265
|
-
now = Time.now
|
266
|
-
input = [
|
267
|
-
now,
|
268
|
-
{
|
269
|
-
"status" => "1",
|
270
|
-
"bytes" => 3.0,
|
271
|
-
"vhost" => :bar,
|
272
|
-
"path" => "/path/to/baz",
|
273
|
-
"method" => "GET",
|
274
|
-
"protocol" => "HTTP/0.9",
|
275
|
-
"agent" => "libwww",
|
276
|
-
"referer" => "http://referer.example",
|
277
|
-
"requesttime" => (now - 1).to_f.to_s,
|
278
|
-
"bot_access" => true,
|
279
|
-
"loginsession" => false,
|
280
|
-
"something-else" => "would be ignored",
|
281
|
-
"yet-another" => {
|
282
|
-
"foo" => "bar",
|
283
|
-
"baz" => 1,
|
284
|
-
},
|
285
|
-
"remote" => {
|
286
|
-
"host" => "remote.example",
|
287
|
-
"ip" => "192.0.2.1",
|
288
|
-
"port" => 12345,
|
289
|
-
"user" => "tagomoris",
|
290
|
-
}
|
291
|
-
}
|
292
|
-
]
|
293
|
-
expected = {
|
294
|
-
"json" => {
|
295
|
-
"time" => now.to_i,
|
296
|
-
"status" => 1,
|
297
|
-
"bytes" => 3,
|
298
|
-
"vhost" => "bar",
|
299
|
-
"path" => "/path/to/baz",
|
300
|
-
"method" => "GET",
|
301
|
-
"protocol" => "HTTP/0.9",
|
302
|
-
"agent" => "libwww",
|
303
|
-
"referer" => "http://referer.example",
|
304
|
-
"requesttime" => (now - 1).to_f.to_s.to_f,
|
305
|
-
"bot_access" => true,
|
306
|
-
"loginsession" => false,
|
307
|
-
"something-else" => "would be ignored",
|
308
|
-
"yet-another" => {
|
309
|
-
"foo" => "bar",
|
310
|
-
"baz" => 1,
|
311
|
-
},
|
312
|
-
"remote" => {
|
313
|
-
"host" => "remote.example",
|
314
|
-
"ip" => "192.0.2.1",
|
315
|
-
"port" => 12345,
|
316
|
-
"user" => "tagomoris",
|
317
|
-
}
|
318
|
-
}
|
319
|
-
}
|
320
|
-
|
321
|
-
driver = create_driver(CONFIG)
|
322
|
-
driver.instance.start
|
323
|
-
buf = driver.instance.format_stream("my.tag", [input])
|
324
|
-
driver.instance.shutdown
|
325
|
-
|
326
|
-
assert_equal expected, MessagePack.unpack(buf)
|
327
|
-
end
|
328
|
-
|
329
|
-
[
|
330
|
-
# <time_format>, <time field type>, <time expectation generator>, <assertion>
|
331
|
-
[
|
332
|
-
"%s.%6N", "field_float",
|
333
|
-
lambda{|t| t.strftime("%s.%6N").to_f },
|
334
|
-
lambda{|recv, expected, actual|
|
335
|
-
recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
|
336
|
-
}
|
337
|
-
],
|
338
|
-
[
|
339
|
-
"%Y-%m-%dT%H:%M:%SZ", "field_string",
|
340
|
-
lambda{|t| t.iso8601 },
|
341
|
-
:assert_equal.to_proc
|
342
|
-
],
|
343
|
-
[
|
344
|
-
"%a, %d %b %Y %H:%M:%S GMT", "field_string",
|
345
|
-
lambda{|t| t.httpdate },
|
346
|
-
:assert_equal.to_proc
|
347
|
-
],
|
348
|
-
].each do |format, type, expect_time, assert|
|
349
|
-
define_method("test_time_formats_#{format}") do
|
350
|
-
now = Time.now.utc
|
351
|
-
input = [ now, {} ]
|
352
|
-
expected = { "json" => { "time" => expect_time[now], } }
|
353
|
-
|
354
|
-
driver = create_driver(<<-CONFIG)
|
355
|
-
table foo
|
356
|
-
email foo@bar.example
|
357
|
-
private_key_path /path/to/key
|
358
|
-
project yourproject_id
|
359
|
-
dataset yourdataset_id
|
360
|
-
|
361
|
-
time_format #{format}
|
362
|
-
time_field time
|
363
|
-
#{type} time
|
364
|
-
CONFIG
|
365
|
-
|
366
|
-
driver.instance.start
|
367
|
-
buf = driver.instance.format_stream("my.tag", [input])
|
368
|
-
driver.instance.shutdown
|
369
|
-
|
370
|
-
assert[self, expected["json"]["time"], MessagePack.unpack(buf)["json"]["time"]]
|
371
|
-
end
|
372
|
-
end
|
373
|
-
|
374
228
|
def test_format_nested_time
|
375
229
|
now = Time.now
|
376
230
|
input = [
|
@@ -402,8 +256,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
402
256
|
time_format %s
|
403
257
|
time_field metadata.time
|
404
258
|
|
405
|
-
|
406
|
-
|
259
|
+
schema [
|
260
|
+
{"name": "metadata", "type": "RECORD", "fields": [
|
261
|
+
{"name": "time", "type": "INTEGER"},
|
262
|
+
{"name": "node", "type": "STRING"}
|
263
|
+
]},
|
264
|
+
{"name": "log", "type": "STRING"}
|
265
|
+
]
|
407
266
|
CONFIG
|
408
267
|
|
409
268
|
driver.instance.start
|
@@ -489,7 +348,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
489
348
|
time_field time
|
490
349
|
|
491
350
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
492
|
-
|
351
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
493
352
|
CONFIG
|
494
353
|
driver.instance.start
|
495
354
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -529,7 +388,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
529
388
|
time_field time
|
530
389
|
|
531
390
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
|
532
|
-
|
391
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
533
392
|
CONFIG
|
534
393
|
driver.instance.start
|
535
394
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -569,7 +428,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
569
428
|
time_field time
|
570
429
|
|
571
430
|
fetch_schema true
|
572
|
-
|
431
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
573
432
|
CONFIG
|
574
433
|
|
575
434
|
writer = stub_writer(driver)
|
@@ -635,7 +494,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
635
494
|
time_field time
|
636
495
|
|
637
496
|
fetch_schema true
|
638
|
-
|
497
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
639
498
|
CONFIG
|
640
499
|
|
641
500
|
writer = stub_writer(driver)
|
@@ -693,7 +552,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
693
552
|
dataset yourdataset_id
|
694
553
|
|
695
554
|
insert_id_field uuid
|
696
|
-
|
555
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
697
556
|
CONFIG
|
698
557
|
driver.instance.start
|
699
558
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -729,7 +588,9 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
729
588
|
dataset yourdataset_id
|
730
589
|
|
731
590
|
insert_id_field data.uuid
|
732
|
-
|
591
|
+
schema [{"name": "data", "type": "RECORD", "fields": [
|
592
|
+
{"name": "uuid", "type": "STRING"}
|
593
|
+
]}]
|
733
594
|
CONFIG
|
734
595
|
driver.instance.start
|
735
596
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -758,7 +619,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
758
619
|
project yourproject_id
|
759
620
|
dataset yourdataset_id
|
760
621
|
|
761
|
-
|
622
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
762
623
|
|
763
624
|
buffer_type memory
|
764
625
|
CONFIG
|
@@ -803,9 +664,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
803
664
|
time_format %s
|
804
665
|
time_field time
|
805
666
|
|
806
|
-
|
807
|
-
|
808
|
-
|
667
|
+
schema [
|
668
|
+
{"name": "time", "type": "INTEGER"},
|
669
|
+
{"name": "vhost", "type": "STRING"},
|
670
|
+
{"name": "refere", "type": "STRING"},
|
671
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
672
|
+
{"name": "login_session", "type": "BOOLEAN"}
|
673
|
+
]
|
809
674
|
CONFIG
|
810
675
|
driver.instance.start
|
811
676
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -854,9 +719,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
854
719
|
time_format %s
|
855
720
|
time_field time
|
856
721
|
|
857
|
-
|
858
|
-
|
859
|
-
|
722
|
+
schema [
|
723
|
+
{"name": "time", "type": "INTEGER"},
|
724
|
+
{"name": "vhost", "type": "STRING"},
|
725
|
+
{"name": "refere", "type": "STRING"},
|
726
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
727
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
728
|
+
]
|
860
729
|
CONFIG
|
861
730
|
driver.instance.start
|
862
731
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -906,10 +775,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
906
775
|
time_format %s
|
907
776
|
time_field time
|
908
777
|
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
778
|
+
schema [
|
779
|
+
{"name": "time", "type": "INTEGER"},
|
780
|
+
{"name": "status", "type": "INTEGER"},
|
781
|
+
{"name": "bytes", "type": "INTEGER"},
|
782
|
+
{"name": "vhost", "type": "STRING"},
|
783
|
+
{"name": "path", "type": "STRING"},
|
784
|
+
{"name": "method", "type": "STRING"},
|
785
|
+
{"name": "protocol", "type": "STRING"},
|
786
|
+
{"name": "agent", "type": "STRING"},
|
787
|
+
{"name": "referer", "type": "STRING"},
|
788
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
789
|
+
{"name": "host", "type": "STRING"},
|
790
|
+
{"name": "ip", "type": "STRING"},
|
791
|
+
{"name": "user", "type": "STRING"}
|
792
|
+
]},
|
793
|
+
{"name": "requesttime", "type": "FLOAT"},
|
794
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
795
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
796
|
+
]
|
913
797
|
<secondary>
|
914
798
|
type file
|
915
799
|
path error
|
@@ -951,10 +835,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
951
835
|
time_format %s
|
952
836
|
time_field time
|
953
837
|
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
838
|
+
schema [
|
839
|
+
{"name": "time", "type": "INTEGER"},
|
840
|
+
{"name": "status", "type": "INTEGER"},
|
841
|
+
{"name": "bytes", "type": "INTEGER"},
|
842
|
+
{"name": "vhost", "type": "STRING"},
|
843
|
+
{"name": "path", "type": "STRING"},
|
844
|
+
{"name": "method", "type": "STRING"},
|
845
|
+
{"name": "protocol", "type": "STRING"},
|
846
|
+
{"name": "agent", "type": "STRING"},
|
847
|
+
{"name": "referer", "type": "STRING"},
|
848
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
849
|
+
{"name": "host", "type": "STRING"},
|
850
|
+
{"name": "ip", "type": "STRING"},
|
851
|
+
{"name": "user", "type": "STRING"}
|
852
|
+
]},
|
853
|
+
{"name": "requesttime", "type": "FLOAT"},
|
854
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
855
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
856
|
+
]
|
958
857
|
<secondary>
|
959
858
|
type file
|
960
859
|
path error
|
@@ -1002,20 +901,16 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1002
901
|
time_field time
|
1003
902
|
|
1004
903
|
schema_path #{schema_path}
|
1005
|
-
field_integer time
|
1006
904
|
|
1007
905
|
buffer_type memory
|
1008
906
|
CONFIG
|
1009
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1010
|
-
h[0][:type] = "INTEGER"
|
1011
|
-
h[0][:mode] = "NULLABLE"
|
1012
|
-
end
|
907
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1013
908
|
|
1014
909
|
writer = stub_writer(driver)
|
1015
910
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1016
911
|
io = StringIO.new("hello")
|
1017
912
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
1018
|
-
mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
913
|
+
mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
1019
914
|
mock(writer.client).insert_job('yourproject_id', {
|
1020
915
|
configuration: {
|
1021
916
|
load: {
|
@@ -1065,22 +960,17 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1065
960
|
time_field time
|
1066
961
|
|
1067
962
|
schema_path #{schema_path}
|
1068
|
-
field_integer time
|
1069
963
|
prevent_duplicate_load true
|
1070
964
|
|
1071
965
|
buffer_type memory
|
1072
966
|
CONFIG
|
1073
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1074
|
-
h[0][:type] = "INTEGER"
|
1075
|
-
h[0][:mode] = "NULLABLE"
|
1076
|
-
end
|
967
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1077
968
|
|
1078
969
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1079
970
|
io = StringIO.new("hello")
|
1080
971
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
1081
|
-
mock.proxy(driver.instance).create_job_id(duck_type(:unique_id), "yourdataset_id", "foo", driver.instance.instance_variable_get(:@fields).to_a, 0, false)
|
1082
972
|
writer = stub_writer(driver)
|
1083
|
-
mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
973
|
+
mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
1084
974
|
mock(writer.client).insert_job('yourproject_id', {
|
1085
975
|
configuration: {
|
1086
976
|
load: {
|
@@ -1131,14 +1021,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1131
1021
|
time_field time
|
1132
1022
|
|
1133
1023
|
schema_path #{schema_path}
|
1134
|
-
field_integer time
|
1135
1024
|
|
1136
1025
|
buffer_type memory
|
1137
1026
|
CONFIG
|
1138
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1139
|
-
h[0][:type] = "INTEGER"
|
1140
|
-
h[0][:mode] = "NULLABLE"
|
1141
|
-
end
|
1027
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1142
1028
|
|
1143
1029
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1144
1030
|
io = StringIO.new("hello")
|
@@ -1209,7 +1095,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1209
1095
|
time_field time
|
1210
1096
|
|
1211
1097
|
schema_path #{schema_path}
|
1212
|
-
field_integer time
|
1213
1098
|
|
1214
1099
|
buffer_type memory
|
1215
1100
|
<secondary>
|
@@ -1218,10 +1103,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1218
1103
|
utc
|
1219
1104
|
</secondary>
|
1220
1105
|
CONFIG
|
1221
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1222
|
-
h[0][:type] = "INTEGER"
|
1223
|
-
h[0][:mode] = "NULLABLE"
|
1224
|
-
end
|
1106
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
1225
1107
|
|
1226
1108
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1227
1109
|
io = StringIO.new("hello")
|
@@ -154,6 +154,23 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
154
154
|
)
|
155
155
|
end
|
156
156
|
|
157
|
+
def test_format_one_convert_array_or_hash_to_json
|
158
|
+
fields = Fluent::BigQuery::RecordSchema.new("record")
|
159
|
+
fields.load_schema(base_schema, false)
|
160
|
+
|
161
|
+
time = Time.local(2016, 2, 7, 19, 0, 0).utc
|
162
|
+
|
163
|
+
formatted = fields.format_one({
|
164
|
+
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42]
|
165
|
+
})
|
166
|
+
assert_equal(
|
167
|
+
formatted,
|
168
|
+
{
|
169
|
+
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"]
|
170
|
+
}
|
171
|
+
)
|
172
|
+
end
|
173
|
+
|
157
174
|
def test_format_one_with_extra_column
|
158
175
|
fields = Fluent::BigQuery::RecordSchema.new("record")
|
159
176
|
fields.load_schema(base_schema, false)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-01-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|