fluent-plugin-bigquery 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +102 -65
- data/lib/fluent/plugin/bigquery/schema.rb +52 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +21 -5
- data/lib/fluent/plugin/out_bigquery.rb +15 -11
- data/test/plugin/test_out_bigquery.rb +120 -238
- data/test/plugin/test_record_schema.rb +17 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7d4074dc903c423acbebd56b2b4d6fc0ce110510
|
|
4
|
+
data.tar.gz: 4d17cd1b2ee3768b83845105b5b9a714835e0a4c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7f99c64e394650b7eac03e6872dcfafb36981f48a726d8aba9d87fc83b45329ebac925c7d1239113995597e4697d8afcf1f9397c8583d0a3bbe11d47aedd668b
|
|
7
|
+
data.tar.gz: 52554bcd622e75486fc8a10ceeebd8af958ac5523869f2ae964324c1348b734fcef00c4232766484a0d3112c15b50eb06f334b6efaf2cd55394321139bc1df9e
|
data/README.md
CHANGED
|
@@ -21,47 +21,48 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
|
|
|
21
21
|
|
|
22
22
|
### Options
|
|
23
23
|
|
|
24
|
-
| name | type | required?
|
|
25
|
-
| :------------------------------------- | :------------ | :-----------
|
|
26
|
-
| method | string | no
|
|
27
|
-
| buffer_type | string | no
|
|
28
|
-
| buffer_chunk_limit | integer | no
|
|
29
|
-
| buffer_queue_limit | integer | no
|
|
30
|
-
| buffer_chunk_records_limit | integer | no
|
|
31
|
-
| flush_interval | float | no
|
|
32
|
-
| try_flush_interval | float | no
|
|
33
|
-
| auth_method | enum | yes
|
|
34
|
-
| email | string | yes (private_key)
|
|
35
|
-
| private_key_path | string | yes (private_key)
|
|
36
|
-
| private_key_passphrase | string | yes (private_key)
|
|
37
|
-
| json_key | string | yes (json_key)
|
|
38
|
-
| project | string | yes
|
|
39
|
-
| table | string | yes (either `tables`)
|
|
40
|
-
| tables | string | yes (either `table`)
|
|
41
|
-
| template_suffix | string | no
|
|
42
|
-
| auto_create_table | bool | no
|
|
43
|
-
| skip_invalid_rows | bool | no
|
|
44
|
-
| max_bad_records | integer | no
|
|
45
|
-
| ignore_unknown_values | bool | no
|
|
46
|
-
|
|
|
47
|
-
|
|
|
48
|
-
|
|
|
49
|
-
|
|
|
50
|
-
|
|
|
51
|
-
|
|
|
52
|
-
|
|
|
53
|
-
|
|
|
54
|
-
|
|
|
55
|
-
|
|
|
56
|
-
|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
61
|
-
|
|
|
62
|
-
|
|
|
63
|
-
|
|
|
64
|
-
|
|
|
24
|
+
| name | type | required? | default | description |
|
|
25
|
+
| :------------------------------------- | :------------ | :----------- | :------------------------- | :----------------------- |
|
|
26
|
+
| method | string | no | insert | `insert` (Streaming Insert) or `load` (load job) |
|
|
27
|
+
| buffer_type | string | no | lightening (insert) or file (load) | |
|
|
28
|
+
| buffer_chunk_limit | integer | no | 1MB (insert) or 1GB (load) | |
|
|
29
|
+
| buffer_queue_limit | integer | no | 1024 (insert) or 32 (load) | |
|
|
30
|
+
| buffer_chunk_records_limit | integer | no | 500 | |
|
|
31
|
+
| flush_interval | float | no | 0.25 (*insert) or default of time sliced output (load) | |
|
|
32
|
+
| try_flush_interval | float | no | 0.05 (*insert) or default of time sliced output (load) | |
|
|
33
|
+
| auth_method | enum | yes | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
|
34
|
+
| email | string | yes (private_key) | nil | GCP Service Account Email |
|
|
35
|
+
| private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
|
|
36
|
+
| private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
|
|
37
|
+
| json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
|
|
38
|
+
| project | string | yes | nil | |
|
|
39
|
+
| table | string | yes (either `tables`) | nil | |
|
|
40
|
+
| tables | string | yes (either `table`) | nil | can set multi table names splitted by `,` |
|
|
41
|
+
| template_suffix | string | no | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
|
|
42
|
+
| auto_create_table | bool | no | false | If true, creates table automatically |
|
|
43
|
+
| skip_invalid_rows | bool | no | false | Only `insert` method. |
|
|
44
|
+
| max_bad_records | integer | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
|
|
45
|
+
| ignore_unknown_values | bool | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
|
46
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | nil | Schema Definition. It is formatted by JSON. |
|
|
47
|
+
| schema_path | string | yes (either `fetch_schema`) | nil | Schema Definition file path. It is formatted by JSON. |
|
|
48
|
+
| fetch_schema | bool | yes (either `schema_path`) | false | If true, fetch table schema definition from Bigquery table automatically. |
|
|
49
|
+
| fetch_schema_table | string | no | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
|
50
|
+
| schema_cache_expire | integer | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
|
51
|
+
| field_string (deprecated) | string | no | nil | see examples. |
|
|
52
|
+
| field_integer (deprecated) | string | no | nil | see examples. |
|
|
53
|
+
| field_float (deprecated) | string | no | nil | see examples. |
|
|
54
|
+
| field_boolean (deprecated) | string | no | nil | see examples. |
|
|
55
|
+
| field_timestamp (deprecated) | string | no | nil | see examples. |
|
|
56
|
+
| time_field | string | no | nil | If this param is set, plugin set formatted time string to this field. |
|
|
57
|
+
| time_format | string | no | nil | ex. `%s`, `%Y/%m%d %H:%M:%S` |
|
|
58
|
+
| replace_record_key | bool | no | false | see examples. |
|
|
59
|
+
| replace_record_key_regexp{1-10} | string | no | nil | see examples. |
|
|
60
|
+
| convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
|
|
61
|
+
| insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
|
|
62
|
+
| request_timeout_sec | integer | no | nil | Bigquery API response timeout |
|
|
63
|
+
| request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
|
64
|
+
| time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
|
65
|
+
| time_partitioning_expiration | time | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
|
65
66
|
|
|
66
67
|
### Standard Options
|
|
67
68
|
|
|
@@ -96,10 +97,25 @@ Configure insert specifications with target table schema, with your credentials.
|
|
|
96
97
|
time_format %s
|
|
97
98
|
time_field time
|
|
98
99
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
schema [
|
|
101
|
+
{"name": "time", "type": "INTEGER"},
|
|
102
|
+
{"name": "status", "type": "INTEGER"},
|
|
103
|
+
{"name": "bytes", "type": "INTEGER"},
|
|
104
|
+
{"name": "vhost", "type": "STRING"},
|
|
105
|
+
{"name": "path", "type": "STRING"},
|
|
106
|
+
{"name": "method", "type": "STRING"},
|
|
107
|
+
{"name": "protocol", "type": "STRING"},
|
|
108
|
+
{"name": "agent", "type": "STRING"},
|
|
109
|
+
{"name": "referer", "type": "STRING"},
|
|
110
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
|
111
|
+
{"name": "host", "type": "STRING"},
|
|
112
|
+
{"name": "ip", "type": "STRING"},
|
|
113
|
+
{"name": "user", "type": "STRING"}
|
|
114
|
+
]},
|
|
115
|
+
{"name": "requesttime", "type": "FLOAT"},
|
|
116
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
117
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
118
|
+
]
|
|
103
119
|
</match>
|
|
104
120
|
```
|
|
105
121
|
|
|
@@ -130,10 +146,25 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
|
130
146
|
time_format %s
|
|
131
147
|
time_field time
|
|
132
148
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
149
|
+
schema [
|
|
150
|
+
{"name": "time", "type": "INTEGER"},
|
|
151
|
+
{"name": "status", "type": "INTEGER"},
|
|
152
|
+
{"name": "bytes", "type": "INTEGER"},
|
|
153
|
+
{"name": "vhost", "type": "STRING"},
|
|
154
|
+
{"name": "path", "type": "STRING"},
|
|
155
|
+
{"name": "method", "type": "STRING"},
|
|
156
|
+
{"name": "protocol", "type": "STRING"},
|
|
157
|
+
{"name": "agent", "type": "STRING"},
|
|
158
|
+
{"name": "referer", "type": "STRING"},
|
|
159
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
|
160
|
+
{"name": "host", "type": "STRING"},
|
|
161
|
+
{"name": "ip", "type": "STRING"},
|
|
162
|
+
{"name": "user", "type": "STRING"}
|
|
163
|
+
]},
|
|
164
|
+
{"name": "requesttime", "type": "FLOAT"},
|
|
165
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
166
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
167
|
+
]
|
|
137
168
|
</match>
|
|
138
169
|
```
|
|
139
170
|
|
|
@@ -266,11 +297,7 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
|
266
297
|
|
|
267
298
|
time_format %s
|
|
268
299
|
time_field time
|
|
269
|
-
|
|
270
|
-
field_integer time,status,bytes
|
|
271
|
-
field_string rhost,vhost,path,method,protocol,agent,referer
|
|
272
|
-
field_float requesttime
|
|
273
|
-
field_boolean bot_access,loginsession
|
|
300
|
+
...
|
|
274
301
|
</match>
|
|
275
302
|
```
|
|
276
303
|
|
|
@@ -419,10 +446,25 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
|
419
446
|
time_format %s
|
|
420
447
|
time_field time
|
|
421
448
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
449
|
+
schema [
|
|
450
|
+
{"name": "time", "type": "INTEGER"},
|
|
451
|
+
{"name": "status", "type": "INTEGER"},
|
|
452
|
+
{"name": "bytes", "type": "INTEGER"},
|
|
453
|
+
{"name": "vhost", "type": "STRING"},
|
|
454
|
+
{"name": "path", "type": "STRING"},
|
|
455
|
+
{"name": "method", "type": "STRING"},
|
|
456
|
+
{"name": "protocol", "type": "STRING"},
|
|
457
|
+
{"name": "agent", "type": "STRING"},
|
|
458
|
+
{"name": "referer", "type": "STRING"},
|
|
459
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
|
460
|
+
{"name": "host", "type": "STRING"},
|
|
461
|
+
{"name": "ip", "type": "STRING"},
|
|
462
|
+
{"name": "user", "type": "STRING"}
|
|
463
|
+
]},
|
|
464
|
+
{"name": "requesttime", "type": "FLOAT"},
|
|
465
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
466
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
467
|
+
]
|
|
426
468
|
</match>
|
|
427
469
|
```
|
|
428
470
|
|
|
@@ -459,10 +501,9 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
|
459
501
|
time_field time
|
|
460
502
|
|
|
461
503
|
schema_path /path/to/httpd.schema
|
|
462
|
-
field_integer time
|
|
463
504
|
</match>
|
|
464
505
|
```
|
|
465
|
-
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
|
|
506
|
+
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexbility.
|
|
466
507
|
|
|
467
508
|
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
|
468
509
|
|
|
@@ -477,7 +518,6 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
|
477
518
|
|
|
478
519
|
fetch_schema true
|
|
479
520
|
# fetch_schema_table other_table # if you want to fetch schema from other table
|
|
480
|
-
field_integer time
|
|
481
521
|
</match>
|
|
482
522
|
```
|
|
483
523
|
|
|
@@ -498,17 +538,14 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
|
498
538
|
...
|
|
499
539
|
|
|
500
540
|
insert_id_field uuid
|
|
501
|
-
|
|
541
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
|
502
542
|
</match>
|
|
503
543
|
```
|
|
504
544
|
|
|
505
545
|
## TODO
|
|
506
546
|
|
|
507
|
-
* support optional data fields
|
|
508
|
-
* support NULLABLE/REQUIRED/REPEATED field options in field list style of configuration
|
|
509
547
|
* OAuth installed application credentials support
|
|
510
548
|
* Google API discovery expiration
|
|
511
|
-
* Error classes
|
|
512
549
|
* check row size limits
|
|
513
550
|
|
|
514
551
|
## Authors
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
require 'multi_json'
|
|
2
|
+
|
|
1
3
|
module Fluent
|
|
2
4
|
module BigQuery
|
|
3
5
|
class FieldSchema
|
|
@@ -56,7 +58,11 @@ module Fluent
|
|
|
56
58
|
end
|
|
57
59
|
|
|
58
60
|
def format_one(value)
|
|
59
|
-
value.
|
|
61
|
+
if value.is_a?(Hash) || value.is_a?(Array)
|
|
62
|
+
MultiJson.dump(value)
|
|
63
|
+
else
|
|
64
|
+
value.to_s
|
|
65
|
+
end
|
|
60
66
|
end
|
|
61
67
|
end
|
|
62
68
|
|
|
@@ -116,6 +122,48 @@ module Fluent
|
|
|
116
122
|
end
|
|
117
123
|
end
|
|
118
124
|
|
|
125
|
+
class DateFieldSchema < FieldSchema
|
|
126
|
+
def type
|
|
127
|
+
:date
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def format_one(value)
|
|
131
|
+
if value.respond_to?(:strftime)
|
|
132
|
+
value.strftime("%Y-%m-%d")
|
|
133
|
+
else
|
|
134
|
+
value
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
class DateTimeFieldSchema < FieldSchema
|
|
140
|
+
def type
|
|
141
|
+
:datetime
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def format_one(value)
|
|
145
|
+
if value.respond_to?(:strftime)
|
|
146
|
+
value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
|
|
147
|
+
else
|
|
148
|
+
value
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
class TimeFieldSchema < FieldSchema
|
|
154
|
+
def type
|
|
155
|
+
:time
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def format_one(value)
|
|
159
|
+
if value.respond_to?(:strftime)
|
|
160
|
+
value.strftime("%H:%M:%S.%6L")
|
|
161
|
+
else
|
|
162
|
+
value
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
119
167
|
class RecordSchema < FieldSchema
|
|
120
168
|
FIELD_TYPES = {
|
|
121
169
|
string: StringFieldSchema,
|
|
@@ -123,6 +171,9 @@ module Fluent
|
|
|
123
171
|
float: FloatFieldSchema,
|
|
124
172
|
boolean: BooleanFieldSchema,
|
|
125
173
|
timestamp: TimestampFieldSchema,
|
|
174
|
+
date: DateFieldSchema,
|
|
175
|
+
datetime: DateTimeFieldSchema,
|
|
176
|
+
time: TimeFieldSchema,
|
|
126
177
|
record: RecordSchema
|
|
127
178
|
}.freeze
|
|
128
179
|
|
|
@@ -6,6 +6,7 @@ module Fluent
|
|
|
6
6
|
@scope = "https://www.googleapis.com/auth/bigquery"
|
|
7
7
|
@auth_options = auth_options
|
|
8
8
|
@log = log
|
|
9
|
+
@num_errors_per_chunk = {}
|
|
9
10
|
|
|
10
11
|
@cached_client_expiration = Time.now + 1800
|
|
11
12
|
end
|
|
@@ -104,7 +105,7 @@ module Fluent
|
|
|
104
105
|
raise Fluent::BigQuery::Error.wrap(e)
|
|
105
106
|
end
|
|
106
107
|
|
|
107
|
-
def create_load_job(project, dataset, table_id, upload_source,
|
|
108
|
+
def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
|
|
108
109
|
configuration = {
|
|
109
110
|
configuration: {
|
|
110
111
|
load: {
|
|
@@ -123,6 +124,8 @@ module Fluent
|
|
|
123
124
|
}
|
|
124
125
|
}
|
|
125
126
|
}
|
|
127
|
+
|
|
128
|
+
job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
|
|
126
129
|
configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
|
|
127
130
|
configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
|
|
128
131
|
|
|
@@ -148,7 +151,8 @@ module Fluent
|
|
|
148
151
|
}
|
|
149
152
|
}
|
|
150
153
|
)
|
|
151
|
-
wait_load_job(project, dataset, res.job_reference.job_id, table_id)
|
|
154
|
+
wait_load_job(chunk_id, project, dataset, res.job_reference.job_id, table_id)
|
|
155
|
+
@num_errors_per_chunk.delete(chunk_id)
|
|
152
156
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
|
153
157
|
@client = nil
|
|
154
158
|
|
|
@@ -161,12 +165,16 @@ module Fluent
|
|
|
161
165
|
raise "table created. send rows next time."
|
|
162
166
|
end
|
|
163
167
|
|
|
164
|
-
|
|
168
|
+
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
|
169
|
+
wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
|
170
|
+
@num_errors_per_chunk.delete(chunk_id)
|
|
171
|
+
return
|
|
172
|
+
end
|
|
165
173
|
|
|
166
174
|
raise Fluent::BigQuery::Error.wrap(e)
|
|
167
175
|
end
|
|
168
176
|
|
|
169
|
-
def wait_load_job(project, dataset, job_id, table_id
|
|
177
|
+
def wait_load_job(chunk_id, project, dataset, job_id, table_id)
|
|
170
178
|
wait_interval = 10
|
|
171
179
|
_response = client.get_job(project, job_id)
|
|
172
180
|
|
|
@@ -186,9 +194,11 @@ module Fluent
|
|
|
186
194
|
error_result = _response.status.error_result
|
|
187
195
|
if error_result
|
|
188
196
|
log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
|
|
189
|
-
if
|
|
197
|
+
if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
|
|
198
|
+
@num_errors_per_chunk[chunk_id] = @num_errors_per_chunk[chunk_id].to_i + 1
|
|
190
199
|
raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
|
|
191
200
|
else
|
|
201
|
+
@num_errors_per_chunk.delete(chunk_id)
|
|
192
202
|
raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
|
193
203
|
end
|
|
194
204
|
end
|
|
@@ -259,6 +269,12 @@ module Fluent
|
|
|
259
269
|
def safe_table_id(table_id)
|
|
260
270
|
table_id.gsub(/\$\d+$/, "")
|
|
261
271
|
end
|
|
272
|
+
|
|
273
|
+
def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
|
|
274
|
+
job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
|
|
275
|
+
@log.debug "job_id_key: #{job_id_key}"
|
|
276
|
+
"fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
|
|
277
|
+
end
|
|
262
278
|
end
|
|
263
279
|
end
|
|
264
280
|
end
|
|
@@ -87,6 +87,7 @@ module Fluent
|
|
|
87
87
|
# Default is false, which treats unknown values as errors.
|
|
88
88
|
config_param :ignore_unknown_values, :bool, default: false
|
|
89
89
|
|
|
90
|
+
config_param :schema, :array, default: nil
|
|
90
91
|
config_param :schema_path, :string, default: nil
|
|
91
92
|
config_param :fetch_schema, :bool, default: false
|
|
92
93
|
config_param :fetch_schema_table, :string, default: nil
|
|
@@ -213,7 +214,11 @@ module Fluent
|
|
|
213
214
|
|
|
214
215
|
@tablelist = @tables ? @tables.split(',') : [@table]
|
|
215
216
|
|
|
217
|
+
legacy_schema_config_deprecation
|
|
216
218
|
@fields = Fluent::BigQuery::RecordSchema.new('record')
|
|
219
|
+
if @schema
|
|
220
|
+
@fields.load_schema(@schema)
|
|
221
|
+
end
|
|
217
222
|
if @schema_path
|
|
218
223
|
@fields.load_schema(MultiJson.load(File.read(@schema_path)))
|
|
219
224
|
end
|
|
@@ -259,6 +264,8 @@ module Fluent
|
|
|
259
264
|
else
|
|
260
265
|
@get_insert_id = nil
|
|
261
266
|
end
|
|
267
|
+
|
|
268
|
+
warn "[DEPRECATION] `convert_hash_to_json` param is deprecated. If Hash value is inserted string field, plugin convert it to json automatically." if @convert_hash_to_json
|
|
262
269
|
end
|
|
263
270
|
|
|
264
271
|
def start
|
|
@@ -329,6 +336,12 @@ module Fluent
|
|
|
329
336
|
record
|
|
330
337
|
end
|
|
331
338
|
|
|
339
|
+
def legacy_schema_config_deprecation
|
|
340
|
+
if [@field_string, @field_integer, @field_float, @field_boolean, @field_timestamp].any?
|
|
341
|
+
warn "[DEPRECATION] `field_*` style schema config is deprecated. Instead of it, use `schema` config params that is array of json style."
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
332
345
|
def write(chunk)
|
|
333
346
|
table_id_format = @tables_mutex.synchronize do
|
|
334
347
|
t = @tables_queue.shift
|
|
@@ -455,14 +468,9 @@ module Fluent
|
|
|
455
468
|
def load(chunk, table_id)
|
|
456
469
|
res = nil
|
|
457
470
|
|
|
458
|
-
if @prevent_duplicate_load
|
|
459
|
-
job_id = create_job_id(chunk, @dataset, table_id, @fields.to_a, @max_bad_records, @ignore_unknown_values)
|
|
460
|
-
else
|
|
461
|
-
job_id = nil
|
|
462
|
-
end
|
|
463
|
-
|
|
464
471
|
create_upload_source(chunk) do |upload_source|
|
|
465
|
-
res = writer.create_load_job(@project, @dataset, table_id, upload_source,
|
|
472
|
+
res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
|
|
473
|
+
prevent_duplicate_load: @prevent_duplicate_load,
|
|
466
474
|
ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
|
|
467
475
|
timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
|
|
468
476
|
time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
|
|
@@ -494,10 +502,6 @@ module Fluent
|
|
|
494
502
|
end
|
|
495
503
|
end
|
|
496
504
|
end
|
|
497
|
-
|
|
498
|
-
def create_job_id(chunk, dataset, table, schema, max_bad_records, ignore_unknown_values)
|
|
499
|
-
"fluentd_job_" + Digest::SHA1.hexdigest("#{chunk.unique_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}")
|
|
500
|
-
end
|
|
501
505
|
end
|
|
502
506
|
end
|
|
503
507
|
end
|
|
@@ -22,16 +22,31 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
22
22
|
time_format %s
|
|
23
23
|
time_field time
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
25
|
+
schema [
|
|
26
|
+
{"name": "time", "type": "INTEGER"},
|
|
27
|
+
{"name": "status", "type": "INTEGER"},
|
|
28
|
+
{"name": "bytes", "type": "INTEGER"},
|
|
29
|
+
{"name": "vhost", "type": "STRING"},
|
|
30
|
+
{"name": "path", "type": "STRING"},
|
|
31
|
+
{"name": "method", "type": "STRING"},
|
|
32
|
+
{"name": "protocol", "type": "STRING"},
|
|
33
|
+
{"name": "agent", "type": "STRING"},
|
|
34
|
+
{"name": "referer", "type": "STRING"},
|
|
35
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
|
36
|
+
{"name": "host", "type": "STRING"},
|
|
37
|
+
{"name": "ip", "type": "STRING"},
|
|
38
|
+
{"name": "user", "type": "STRING"}
|
|
39
|
+
]},
|
|
40
|
+
{"name": "requesttime", "type": "FLOAT"},
|
|
41
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
42
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
43
|
+
]
|
|
29
44
|
]
|
|
30
45
|
|
|
31
46
|
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
|
32
47
|
|
|
33
48
|
def create_driver(conf = CONFIG)
|
|
34
|
-
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
|
|
49
|
+
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf, true)
|
|
35
50
|
end
|
|
36
51
|
|
|
37
52
|
def stub_writer(driver)
|
|
@@ -91,7 +106,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
91
106
|
auth_method compute_engine
|
|
92
107
|
project yourproject_id
|
|
93
108
|
dataset yourdataset_id
|
|
94
|
-
|
|
109
|
+
schema [
|
|
110
|
+
{"name": "time", "type": "INTEGER"},
|
|
111
|
+
{"name": "status", "type": "INTEGER"},
|
|
112
|
+
{"name": "bytes", "type": "INTEGER"}
|
|
113
|
+
]
|
|
95
114
|
])
|
|
96
115
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
|
97
116
|
driver.instance.writer
|
|
@@ -114,7 +133,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
114
133
|
json_key #{json_key_path}
|
|
115
134
|
project yourproject_id
|
|
116
135
|
dataset yourdataset_id
|
|
117
|
-
|
|
136
|
+
schema [
|
|
137
|
+
{"name": "time", "type": "INTEGER"},
|
|
138
|
+
{"name": "status", "type": "INTEGER"},
|
|
139
|
+
{"name": "bytes", "type": "INTEGER"}
|
|
140
|
+
]
|
|
118
141
|
])
|
|
119
142
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
|
120
143
|
driver.instance.writer
|
|
@@ -134,7 +157,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
134
157
|
json_key #{json_key_path}
|
|
135
158
|
project yourproject_id
|
|
136
159
|
dataset yourdataset_id
|
|
137
|
-
|
|
160
|
+
schema [
|
|
161
|
+
{"name": "time", "type": "INTEGER"},
|
|
162
|
+
{"name": "status", "type": "INTEGER"},
|
|
163
|
+
{"name": "bytes", "type": "INTEGER"}
|
|
164
|
+
]
|
|
138
165
|
])
|
|
139
166
|
assert_raises(Errno::EACCES) do
|
|
140
167
|
driver.instance.writer.client
|
|
@@ -147,9 +174,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
147
174
|
def test_configure_auth_json_key_as_string
|
|
148
175
|
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
|
149
176
|
json_key_io = StringIO.new(json_key)
|
|
150
|
-
mock(StringIO).new(json_key) { json_key_io }
|
|
151
177
|
authorization = Object.new
|
|
152
|
-
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
|
|
178
|
+
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
|
|
153
179
|
|
|
154
180
|
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
|
155
181
|
mock(cl).__send__(:authorization=, authorization) {}
|
|
@@ -162,7 +188,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
162
188
|
json_key #{json_key}
|
|
163
189
|
project yourproject_id
|
|
164
190
|
dataset yourdataset_id
|
|
165
|
-
|
|
191
|
+
schema [
|
|
192
|
+
{"name": "time", "type": "INTEGER"},
|
|
193
|
+
{"name": "status", "type": "INTEGER"},
|
|
194
|
+
{"name": "bytes", "type": "INTEGER"}
|
|
195
|
+
]
|
|
166
196
|
])
|
|
167
197
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
|
168
198
|
driver.instance.writer
|
|
@@ -183,7 +213,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
183
213
|
auth_method application_default
|
|
184
214
|
project yourproject_id
|
|
185
215
|
dataset yourdataset_id
|
|
186
|
-
|
|
216
|
+
schema [
|
|
217
|
+
{"name": "time", "type": "INTEGER"},
|
|
218
|
+
{"name": "status", "type": "INTEGER"},
|
|
219
|
+
{"name": "bytes", "type": "INTEGER"}
|
|
220
|
+
]
|
|
187
221
|
])
|
|
188
222
|
|
|
189
223
|
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
|
@@ -191,186 +225,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
191
225
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
|
192
226
|
end
|
|
193
227
|
|
|
194
|
-
def test_configure_fieldname_stripped
|
|
195
|
-
driver = create_driver(%[
|
|
196
|
-
table foo
|
|
197
|
-
email foo@bar.example
|
|
198
|
-
private_key_path /path/to/key
|
|
199
|
-
project yourproject_id
|
|
200
|
-
dataset yourdataset_id
|
|
201
|
-
|
|
202
|
-
time_format %s
|
|
203
|
-
time_field time
|
|
204
|
-
|
|
205
|
-
field_integer time , status , bytes
|
|
206
|
-
field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
|
|
207
|
-
field_float requesttime
|
|
208
|
-
field_boolean bot_access , loginsession
|
|
209
|
-
])
|
|
210
|
-
fields = driver.instance.instance_eval{ @fields }
|
|
211
|
-
|
|
212
|
-
assert (not fields['time ']), "tailing spaces must be stripped"
|
|
213
|
-
assert fields['time']
|
|
214
|
-
assert fields['status']
|
|
215
|
-
assert fields['bytes']
|
|
216
|
-
assert fields['_log_name']
|
|
217
|
-
assert fields['vhost']
|
|
218
|
-
assert fields['protocol']
|
|
219
|
-
assert fields['agent']
|
|
220
|
-
assert fields['referer']
|
|
221
|
-
assert fields['remote']['host']
|
|
222
|
-
assert fields['remote']['ip']
|
|
223
|
-
assert fields['remote']['user']
|
|
224
|
-
assert fields['requesttime']
|
|
225
|
-
assert fields['bot_access']
|
|
226
|
-
assert fields['loginsession']
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
def test_configure_invalid_fieldname
|
|
230
|
-
base = %[
|
|
231
|
-
table foo
|
|
232
|
-
email foo@bar.example
|
|
233
|
-
private_key_path /path/to/key
|
|
234
|
-
project yourproject_id
|
|
235
|
-
dataset yourdataset_id
|
|
236
|
-
|
|
237
|
-
time_format %s
|
|
238
|
-
time_field time
|
|
239
|
-
]
|
|
240
|
-
|
|
241
|
-
assert_raises(Fluent::ConfigError) do
|
|
242
|
-
create_driver(base + "field_integer time field\n")
|
|
243
|
-
end
|
|
244
|
-
assert_raises(Fluent::ConfigError) do
|
|
245
|
-
create_driver(base + "field_string my name\n")
|
|
246
|
-
end
|
|
247
|
-
assert_raises(Fluent::ConfigError) do
|
|
248
|
-
create_driver(base + "field_string remote.host name\n")
|
|
249
|
-
end
|
|
250
|
-
assert_raises(Fluent::ConfigError) do
|
|
251
|
-
create_driver(base + "field_string 1column\n")
|
|
252
|
-
end
|
|
253
|
-
assert_raises(Fluent::ConfigError) do
|
|
254
|
-
create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
|
|
255
|
-
end
|
|
256
|
-
assert_raises(Fluent::ConfigError) do
|
|
257
|
-
create_driver(base + "field_float request time\n")
|
|
258
|
-
end
|
|
259
|
-
assert_raises(Fluent::ConfigError) do
|
|
260
|
-
create_driver(base + "field_boolean login session\n")
|
|
261
|
-
end
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
def test_format_stream
|
|
265
|
-
now = Time.now
|
|
266
|
-
input = [
|
|
267
|
-
now,
|
|
268
|
-
{
|
|
269
|
-
"status" => "1",
|
|
270
|
-
"bytes" => 3.0,
|
|
271
|
-
"vhost" => :bar,
|
|
272
|
-
"path" => "/path/to/baz",
|
|
273
|
-
"method" => "GET",
|
|
274
|
-
"protocol" => "HTTP/0.9",
|
|
275
|
-
"agent" => "libwww",
|
|
276
|
-
"referer" => "http://referer.example",
|
|
277
|
-
"requesttime" => (now - 1).to_f.to_s,
|
|
278
|
-
"bot_access" => true,
|
|
279
|
-
"loginsession" => false,
|
|
280
|
-
"something-else" => "would be ignored",
|
|
281
|
-
"yet-another" => {
|
|
282
|
-
"foo" => "bar",
|
|
283
|
-
"baz" => 1,
|
|
284
|
-
},
|
|
285
|
-
"remote" => {
|
|
286
|
-
"host" => "remote.example",
|
|
287
|
-
"ip" => "192.0.2.1",
|
|
288
|
-
"port" => 12345,
|
|
289
|
-
"user" => "tagomoris",
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
]
|
|
293
|
-
expected = {
|
|
294
|
-
"json" => {
|
|
295
|
-
"time" => now.to_i,
|
|
296
|
-
"status" => 1,
|
|
297
|
-
"bytes" => 3,
|
|
298
|
-
"vhost" => "bar",
|
|
299
|
-
"path" => "/path/to/baz",
|
|
300
|
-
"method" => "GET",
|
|
301
|
-
"protocol" => "HTTP/0.9",
|
|
302
|
-
"agent" => "libwww",
|
|
303
|
-
"referer" => "http://referer.example",
|
|
304
|
-
"requesttime" => (now - 1).to_f.to_s.to_f,
|
|
305
|
-
"bot_access" => true,
|
|
306
|
-
"loginsession" => false,
|
|
307
|
-
"something-else" => "would be ignored",
|
|
308
|
-
"yet-another" => {
|
|
309
|
-
"foo" => "bar",
|
|
310
|
-
"baz" => 1,
|
|
311
|
-
},
|
|
312
|
-
"remote" => {
|
|
313
|
-
"host" => "remote.example",
|
|
314
|
-
"ip" => "192.0.2.1",
|
|
315
|
-
"port" => 12345,
|
|
316
|
-
"user" => "tagomoris",
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
driver = create_driver(CONFIG)
|
|
322
|
-
driver.instance.start
|
|
323
|
-
buf = driver.instance.format_stream("my.tag", [input])
|
|
324
|
-
driver.instance.shutdown
|
|
325
|
-
|
|
326
|
-
assert_equal expected, MessagePack.unpack(buf)
|
|
327
|
-
end
|
|
328
|
-
|
|
329
|
-
[
|
|
330
|
-
# <time_format>, <time field type>, <time expectation generator>, <assertion>
|
|
331
|
-
[
|
|
332
|
-
"%s.%6N", "field_float",
|
|
333
|
-
lambda{|t| t.strftime("%s.%6N").to_f },
|
|
334
|
-
lambda{|recv, expected, actual|
|
|
335
|
-
recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
|
|
336
|
-
}
|
|
337
|
-
],
|
|
338
|
-
[
|
|
339
|
-
"%Y-%m-%dT%H:%M:%SZ", "field_string",
|
|
340
|
-
lambda{|t| t.iso8601 },
|
|
341
|
-
:assert_equal.to_proc
|
|
342
|
-
],
|
|
343
|
-
[
|
|
344
|
-
"%a, %d %b %Y %H:%M:%S GMT", "field_string",
|
|
345
|
-
lambda{|t| t.httpdate },
|
|
346
|
-
:assert_equal.to_proc
|
|
347
|
-
],
|
|
348
|
-
].each do |format, type, expect_time, assert|
|
|
349
|
-
define_method("test_time_formats_#{format}") do
|
|
350
|
-
now = Time.now.utc
|
|
351
|
-
input = [ now, {} ]
|
|
352
|
-
expected = { "json" => { "time" => expect_time[now], } }
|
|
353
|
-
|
|
354
|
-
driver = create_driver(<<-CONFIG)
|
|
355
|
-
table foo
|
|
356
|
-
email foo@bar.example
|
|
357
|
-
private_key_path /path/to/key
|
|
358
|
-
project yourproject_id
|
|
359
|
-
dataset yourdataset_id
|
|
360
|
-
|
|
361
|
-
time_format #{format}
|
|
362
|
-
time_field time
|
|
363
|
-
#{type} time
|
|
364
|
-
CONFIG
|
|
365
|
-
|
|
366
|
-
driver.instance.start
|
|
367
|
-
buf = driver.instance.format_stream("my.tag", [input])
|
|
368
|
-
driver.instance.shutdown
|
|
369
|
-
|
|
370
|
-
assert[self, expected["json"]["time"], MessagePack.unpack(buf)["json"]["time"]]
|
|
371
|
-
end
|
|
372
|
-
end
|
|
373
|
-
|
|
374
228
|
def test_format_nested_time
|
|
375
229
|
now = Time.now
|
|
376
230
|
input = [
|
|
@@ -402,8 +256,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
402
256
|
time_format %s
|
|
403
257
|
time_field metadata.time
|
|
404
258
|
|
|
405
|
-
|
|
406
|
-
|
|
259
|
+
schema [
|
|
260
|
+
{"name": "metadata", "type": "RECORD", "fields": [
|
|
261
|
+
{"name": "time", "type": "INTEGER"},
|
|
262
|
+
{"name": "node", "type": "STRING"}
|
|
263
|
+
]},
|
|
264
|
+
{"name": "log", "type": "STRING"}
|
|
265
|
+
]
|
|
407
266
|
CONFIG
|
|
408
267
|
|
|
409
268
|
driver.instance.start
|
|
@@ -489,7 +348,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
489
348
|
time_field time
|
|
490
349
|
|
|
491
350
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
|
492
|
-
|
|
351
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
|
493
352
|
CONFIG
|
|
494
353
|
driver.instance.start
|
|
495
354
|
buf = driver.instance.format_stream("my.tag", [input])
|
|
@@ -529,7 +388,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
529
388
|
time_field time
|
|
530
389
|
|
|
531
390
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
|
|
532
|
-
|
|
391
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
|
533
392
|
CONFIG
|
|
534
393
|
driver.instance.start
|
|
535
394
|
buf = driver.instance.format_stream("my.tag", [input])
|
|
@@ -569,7 +428,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
569
428
|
time_field time
|
|
570
429
|
|
|
571
430
|
fetch_schema true
|
|
572
|
-
|
|
431
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
|
573
432
|
CONFIG
|
|
574
433
|
|
|
575
434
|
writer = stub_writer(driver)
|
|
@@ -635,7 +494,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
635
494
|
time_field time
|
|
636
495
|
|
|
637
496
|
fetch_schema true
|
|
638
|
-
|
|
497
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
|
639
498
|
CONFIG
|
|
640
499
|
|
|
641
500
|
writer = stub_writer(driver)
|
|
@@ -693,7 +552,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
693
552
|
dataset yourdataset_id
|
|
694
553
|
|
|
695
554
|
insert_id_field uuid
|
|
696
|
-
|
|
555
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
|
697
556
|
CONFIG
|
|
698
557
|
driver.instance.start
|
|
699
558
|
buf = driver.instance.format_stream("my.tag", [input])
|
|
@@ -729,7 +588,9 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
729
588
|
dataset yourdataset_id
|
|
730
589
|
|
|
731
590
|
insert_id_field data.uuid
|
|
732
|
-
|
|
591
|
+
schema [{"name": "data", "type": "RECORD", "fields": [
|
|
592
|
+
{"name": "uuid", "type": "STRING"}
|
|
593
|
+
]}]
|
|
733
594
|
CONFIG
|
|
734
595
|
driver.instance.start
|
|
735
596
|
buf = driver.instance.format_stream("my.tag", [input])
|
|
@@ -758,7 +619,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
758
619
|
project yourproject_id
|
|
759
620
|
dataset yourdataset_id
|
|
760
621
|
|
|
761
|
-
|
|
622
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
|
762
623
|
|
|
763
624
|
buffer_type memory
|
|
764
625
|
CONFIG
|
|
@@ -803,9 +664,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
803
664
|
time_format %s
|
|
804
665
|
time_field time
|
|
805
666
|
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
667
|
+
schema [
|
|
668
|
+
{"name": "time", "type": "INTEGER"},
|
|
669
|
+
{"name": "vhost", "type": "STRING"},
|
|
670
|
+
{"name": "refere", "type": "STRING"},
|
|
671
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
672
|
+
{"name": "login_session", "type": "BOOLEAN"}
|
|
673
|
+
]
|
|
809
674
|
CONFIG
|
|
810
675
|
driver.instance.start
|
|
811
676
|
buf = driver.instance.format_stream("my.tag", [input])
|
|
@@ -854,9 +719,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
854
719
|
time_format %s
|
|
855
720
|
time_field time
|
|
856
721
|
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
722
|
+
schema [
|
|
723
|
+
{"name": "time", "type": "INTEGER"},
|
|
724
|
+
{"name": "vhost", "type": "STRING"},
|
|
725
|
+
{"name": "refere", "type": "STRING"},
|
|
726
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
727
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
728
|
+
]
|
|
860
729
|
CONFIG
|
|
861
730
|
driver.instance.start
|
|
862
731
|
buf = driver.instance.format_stream("my.tag", [input])
|
|
@@ -906,10 +775,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
906
775
|
time_format %s
|
|
907
776
|
time_field time
|
|
908
777
|
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
778
|
+
schema [
|
|
779
|
+
{"name": "time", "type": "INTEGER"},
|
|
780
|
+
{"name": "status", "type": "INTEGER"},
|
|
781
|
+
{"name": "bytes", "type": "INTEGER"},
|
|
782
|
+
{"name": "vhost", "type": "STRING"},
|
|
783
|
+
{"name": "path", "type": "STRING"},
|
|
784
|
+
{"name": "method", "type": "STRING"},
|
|
785
|
+
{"name": "protocol", "type": "STRING"},
|
|
786
|
+
{"name": "agent", "type": "STRING"},
|
|
787
|
+
{"name": "referer", "type": "STRING"},
|
|
788
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
|
789
|
+
{"name": "host", "type": "STRING"},
|
|
790
|
+
{"name": "ip", "type": "STRING"},
|
|
791
|
+
{"name": "user", "type": "STRING"}
|
|
792
|
+
]},
|
|
793
|
+
{"name": "requesttime", "type": "FLOAT"},
|
|
794
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
795
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
796
|
+
]
|
|
913
797
|
<secondary>
|
|
914
798
|
type file
|
|
915
799
|
path error
|
|
@@ -951,10 +835,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
951
835
|
time_format %s
|
|
952
836
|
time_field time
|
|
953
837
|
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
838
|
+
schema [
|
|
839
|
+
{"name": "time", "type": "INTEGER"},
|
|
840
|
+
{"name": "status", "type": "INTEGER"},
|
|
841
|
+
{"name": "bytes", "type": "INTEGER"},
|
|
842
|
+
{"name": "vhost", "type": "STRING"},
|
|
843
|
+
{"name": "path", "type": "STRING"},
|
|
844
|
+
{"name": "method", "type": "STRING"},
|
|
845
|
+
{"name": "protocol", "type": "STRING"},
|
|
846
|
+
{"name": "agent", "type": "STRING"},
|
|
847
|
+
{"name": "referer", "type": "STRING"},
|
|
848
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
|
849
|
+
{"name": "host", "type": "STRING"},
|
|
850
|
+
{"name": "ip", "type": "STRING"},
|
|
851
|
+
{"name": "user", "type": "STRING"}
|
|
852
|
+
]},
|
|
853
|
+
{"name": "requesttime", "type": "FLOAT"},
|
|
854
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
|
855
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
|
856
|
+
]
|
|
958
857
|
<secondary>
|
|
959
858
|
type file
|
|
960
859
|
path error
|
|
@@ -1002,20 +901,16 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1002
901
|
time_field time
|
|
1003
902
|
|
|
1004
903
|
schema_path #{schema_path}
|
|
1005
|
-
field_integer time
|
|
1006
904
|
|
|
1007
905
|
buffer_type memory
|
|
1008
906
|
CONFIG
|
|
1009
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1010
|
-
h[0][:type] = "INTEGER"
|
|
1011
|
-
h[0][:mode] = "NULLABLE"
|
|
1012
|
-
end
|
|
907
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1013
908
|
|
|
1014
909
|
writer = stub_writer(driver)
|
|
1015
910
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
|
1016
911
|
io = StringIO.new("hello")
|
|
1017
912
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
|
1018
|
-
mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
|
913
|
+
mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
|
1019
914
|
mock(writer.client).insert_job('yourproject_id', {
|
|
1020
915
|
configuration: {
|
|
1021
916
|
load: {
|
|
@@ -1065,22 +960,17 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1065
960
|
time_field time
|
|
1066
961
|
|
|
1067
962
|
schema_path #{schema_path}
|
|
1068
|
-
field_integer time
|
|
1069
963
|
prevent_duplicate_load true
|
|
1070
964
|
|
|
1071
965
|
buffer_type memory
|
|
1072
966
|
CONFIG
|
|
1073
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1074
|
-
h[0][:type] = "INTEGER"
|
|
1075
|
-
h[0][:mode] = "NULLABLE"
|
|
1076
|
-
end
|
|
967
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1077
968
|
|
|
1078
969
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
|
1079
970
|
io = StringIO.new("hello")
|
|
1080
971
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
|
1081
|
-
mock.proxy(driver.instance).create_job_id(duck_type(:unique_id), "yourdataset_id", "foo", driver.instance.instance_variable_get(:@fields).to_a, 0, false)
|
|
1082
972
|
writer = stub_writer(driver)
|
|
1083
|
-
mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
|
973
|
+
mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
|
1084
974
|
mock(writer.client).insert_job('yourproject_id', {
|
|
1085
975
|
configuration: {
|
|
1086
976
|
load: {
|
|
@@ -1131,14 +1021,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1131
1021
|
time_field time
|
|
1132
1022
|
|
|
1133
1023
|
schema_path #{schema_path}
|
|
1134
|
-
field_integer time
|
|
1135
1024
|
|
|
1136
1025
|
buffer_type memory
|
|
1137
1026
|
CONFIG
|
|
1138
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1139
|
-
h[0][:type] = "INTEGER"
|
|
1140
|
-
h[0][:mode] = "NULLABLE"
|
|
1141
|
-
end
|
|
1027
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1142
1028
|
|
|
1143
1029
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
|
1144
1030
|
io = StringIO.new("hello")
|
|
@@ -1209,7 +1095,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1209
1095
|
time_field time
|
|
1210
1096
|
|
|
1211
1097
|
schema_path #{schema_path}
|
|
1212
|
-
field_integer time
|
|
1213
1098
|
|
|
1214
1099
|
buffer_type memory
|
|
1215
1100
|
<secondary>
|
|
@@ -1218,10 +1103,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
|
1218
1103
|
utc
|
|
1219
1104
|
</secondary>
|
|
1220
1105
|
CONFIG
|
|
1221
|
-
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1222
|
-
h[0][:type] = "INTEGER"
|
|
1223
|
-
h[0][:mode] = "NULLABLE"
|
|
1224
|
-
end
|
|
1106
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
|
|
1225
1107
|
|
|
1226
1108
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
|
1227
1109
|
io = StringIO.new("hello")
|
|
@@ -154,6 +154,23 @@ class RecordSchemaTest < Test::Unit::TestCase
|
|
|
154
154
|
)
|
|
155
155
|
end
|
|
156
156
|
|
|
157
|
+
def test_format_one_convert_array_or_hash_to_json
|
|
158
|
+
fields = Fluent::BigQuery::RecordSchema.new("record")
|
|
159
|
+
fields.load_schema(base_schema, false)
|
|
160
|
+
|
|
161
|
+
time = Time.local(2016, 2, 7, 19, 0, 0).utc
|
|
162
|
+
|
|
163
|
+
formatted = fields.format_one({
|
|
164
|
+
"time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42]
|
|
165
|
+
})
|
|
166
|
+
assert_equal(
|
|
167
|
+
formatted,
|
|
168
|
+
{
|
|
169
|
+
"time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"]
|
|
170
|
+
}
|
|
171
|
+
)
|
|
172
|
+
end
|
|
173
|
+
|
|
157
174
|
def test_format_one_with_extra_column
|
|
158
175
|
fields = Fluent::BigQuery::RecordSchema.new("record")
|
|
159
176
|
fields.load_schema(base_schema, false)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-bigquery
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Naoya Ito
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2017-01-30 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rake
|