fluent-plugin-bigquery 0.3.4 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b4f46435678df7e2b883e2dfdbb0a77a9359481
4
- data.tar.gz: 7cbe9b3aa6e4c22ccf0299bfd0a8ff8d1892ccff
3
+ metadata.gz: 7d4074dc903c423acbebd56b2b4d6fc0ce110510
4
+ data.tar.gz: 4d17cd1b2ee3768b83845105b5b9a714835e0a4c
5
5
  SHA512:
6
- metadata.gz: b3e02368662e2c7448726d9d3652aa80d60cd19a040122cfdd05aed6b36949e2c5b760cc3812f690633e4ad3685fd01da14eacab65066caa18e754b8432e7dde
7
- data.tar.gz: b9d5c81042fe958b6230d9ebffbf6c09526d7f7bdb0b2641667e2c7c34ab70418d03542a23bb90bff23e565315f7c1e6817ba792b1e622d94d015bad6eea4ef0
6
+ metadata.gz: 7f99c64e394650b7eac03e6872dcfafb36981f48a726d8aba9d87fc83b45329ebac925c7d1239113995597e4697d8afcf1f9397c8583d0a3bbe11d47aedd668b
7
+ data.tar.gz: 52554bcd622e75486fc8a10ceeebd8af958ac5523869f2ae964324c1348b734fcef00c4232766484a0d3112c15b50eb06f334b6efaf2cd55394321139bc1df9e
data/README.md CHANGED
@@ -21,47 +21,48 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
21
21
 
22
22
  ### Options
23
23
 
24
- | name | type | required? | default | description |
25
- | :------------------------------------- | :------------ | :----------- | :------------------------- | :----------------------- |
26
- | method | string | no | insert | `insert` (Streaming Insert) or `load` (load job) |
27
- | buffer_type | string | no | lightening (insert) or file (load) | |
28
- | buffer_chunk_limit | integer | no | 1MB (insert) or 1GB (load) | |
29
- | buffer_queue_limit | integer | no | 1024 (insert) or 32 (load) | |
30
- | buffer_chunk_records_limit | integer | no | 500 | |
31
- | flush_interval | float | no | 0.25 (*insert) or default of time sliced output (load) | |
32
- | try_flush_interval | float | no | 0.05 (*insert) or default of time sliced output (load) | |
33
- | auth_method | enum | yes | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
34
- | email | string | yes (private_key) | nil | GCP Service Account Email |
35
- | private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
36
- | private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
37
- | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
38
- | project | string | yes | nil | |
39
- | table | string | yes (either `tables`) | nil | |
40
- | tables | string | yes (either `table`) | nil | can set multi table names splitted by `,` |
41
- | template_suffix | string | no | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
42
- | auto_create_table | bool | no | false | If true, creates table automatically |
43
- | skip_invalid_rows | bool | no | false | Only `insert` method. |
44
- | max_bad_records | integer | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
45
- | ignore_unknown_values | bool | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
46
- | schema_path | string | yes (either `fetch_schema`) | nil | Schema Definition file path. It is formatted by JSON. |
47
- | fetch_schema | bool | yes (either `schema_path`) | false | If true, fetch table schema definition from Bigquery table automatically. |
48
- | fetch_schema_table | string | no | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
49
- | schema_cache_expire | integer | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
50
- | field_string | string | no | nil | see examples. |
51
- | field_integer | string | no | nil | see examples. |
52
- | field_float | string | no | nil | see examples. |
53
- | field_boolean | string | no | nil | see examples. |
54
- | field_timestamp | string | no | nil | see examples. |
55
- | time_field | string | no | nil | If this param is set, plugin set formatted time string to this field. |
56
- | time_format | string | no | nil | ex. `%s`, `%Y/%m%d %H:%M:%S` |
57
- | replace_record_key | bool | no | false | see examples. |
58
- | replace_record_key_regexp{1-10} | string | no | nil | see examples. |
59
- | convert_hash_to_json | bool | no | false | If true, converts Hash value of record to JSON String. |
60
- | insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
61
- | request_timeout_sec | integer | no | nil | Bigquery API response timeout |
62
- | request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
63
- | time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
64
- | time_partitioning_expiration | time | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
24
+ | name | type | required? | default | description |
25
+ | :------------------------------------- | :------------ | :----------- | :------------------------- | :----------------------- |
26
+ | method | string | no | insert | `insert` (Streaming Insert) or `load` (load job) |
27
+ | buffer_type | string | no | lightening (insert) or file (load) | |
28
+ | buffer_chunk_limit | integer | no | 1MB (insert) or 1GB (load) | |
29
+ | buffer_queue_limit | integer | no | 1024 (insert) or 32 (load) | |
30
+ | buffer_chunk_records_limit | integer | no | 500 | |
31
+ | flush_interval | float | no | 0.25 (*insert) or default of time sliced output (load) | |
32
+ | try_flush_interval | float | no | 0.05 (*insert) or default of time sliced output (load) | |
33
+ | auth_method | enum | yes | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
34
+ | email | string | yes (private_key) | nil | GCP Service Account Email |
35
+ | private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
36
+ | private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
37
+ | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
38
+ | project | string | yes | nil | |
39
+ | table | string | yes (either `tables`) | nil | |
40
+ | tables | string | yes (either `table`) | nil | can set multi table names splitted by `,` |
41
+ | template_suffix | string | no | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
42
+ | auto_create_table | bool | no | false | If true, creates table automatically |
43
+ | skip_invalid_rows | bool | no | false | Only `insert` method. |
44
+ | max_bad_records | integer | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
45
+ | ignore_unknown_values | bool | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
46
+ | schema | array | yes (either `fetch_schema` or `schema_path`) | nil | Schema Definition. It is formatted by JSON. |
47
+ | schema_path | string | yes (either `fetch_schema`) | nil | Schema Definition file path. It is formatted by JSON. |
48
+ | fetch_schema | bool | yes (either `schema_path`) | false | If true, fetch table schema definition from Bigquery table automatically. |
49
+ | fetch_schema_table | string | no | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
50
+ | schema_cache_expire | integer | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
51
+ | field_string (deprecated) | string | no | nil | see examples. |
52
+ | field_integer (deprecated) | string | no | nil | see examples. |
53
+ | field_float (deprecated) | string | no | nil | see examples. |
54
+ | field_boolean (deprecated) | string | no | nil | see examples. |
55
+ | field_timestamp (deprecated) | string | no | nil | see examples. |
56
+ | time_field | string | no | nil | If this param is set, plugin set formatted time string to this field. |
57
+ | time_format | string | no | nil | ex. `%s`, `%Y/%m%d %H:%M:%S` |
58
+ | replace_record_key | bool | no | false | see examples. |
59
+ | replace_record_key_regexp{1-10} | string | no | nil | see examples. |
60
+ | convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
61
+ | insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
62
+ | request_timeout_sec | integer | no | nil | Bigquery API response timeout |
63
+ | request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
64
+ | time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
65
+ | time_partitioning_expiration | time | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
65
66
 
66
67
  ### Standard Options
67
68
 
@@ -96,10 +97,25 @@ Configure insert specifications with target table schema, with your credentials.
96
97
  time_format %s
97
98
  time_field time
98
99
 
99
- field_integer time,status,bytes
100
- field_string rhost,vhost,path,method,protocol,agent,referer
101
- field_float requesttime
102
- field_boolean bot_access,loginsession
100
+ schema [
101
+ {"name": "time", "type": "INTEGER"},
102
+ {"name": "status", "type": "INTEGER"},
103
+ {"name": "bytes", "type": "INTEGER"},
104
+ {"name": "vhost", "type": "STRING"},
105
+ {"name": "path", "type": "STRING"},
106
+ {"name": "method", "type": "STRING"},
107
+ {"name": "protocol", "type": "STRING"},
108
+ {"name": "agent", "type": "STRING"},
109
+ {"name": "referer", "type": "STRING"},
110
+ {"name": "remote", "type": "RECORD", "fields": [
111
+ {"name": "host", "type": "STRING"},
112
+ {"name": "ip", "type": "STRING"},
113
+ {"name": "user", "type": "STRING"}
114
+ ]},
115
+ {"name": "requesttime", "type": "FLOAT"},
116
+ {"name": "bot_access", "type": "BOOLEAN"},
117
+ {"name": "loginsession", "type": "BOOLEAN"}
118
+ ]
103
119
  </match>
104
120
  ```
105
121
 
@@ -130,10 +146,25 @@ For high rate inserts over streaming inserts, you should specify flush intervals
130
146
  time_format %s
131
147
  time_field time
132
148
 
133
- field_integer time,status,bytes
134
- field_string rhost,vhost,path,method,protocol,agent,referer
135
- field_float requesttime
136
- field_boolean bot_access,loginsession
149
+ schema [
150
+ {"name": "time", "type": "INTEGER"},
151
+ {"name": "status", "type": "INTEGER"},
152
+ {"name": "bytes", "type": "INTEGER"},
153
+ {"name": "vhost", "type": "STRING"},
154
+ {"name": "path", "type": "STRING"},
155
+ {"name": "method", "type": "STRING"},
156
+ {"name": "protocol", "type": "STRING"},
157
+ {"name": "agent", "type": "STRING"},
158
+ {"name": "referer", "type": "STRING"},
159
+ {"name": "remote", "type": "RECORD", "fields": [
160
+ {"name": "host", "type": "STRING"},
161
+ {"name": "ip", "type": "STRING"},
162
+ {"name": "user", "type": "STRING"}
163
+ ]},
164
+ {"name": "requesttime", "type": "FLOAT"},
165
+ {"name": "bot_access", "type": "BOOLEAN"},
166
+ {"name": "loginsession", "type": "BOOLEAN"}
167
+ ]
137
168
  </match>
138
169
  ```
139
170
 
@@ -266,11 +297,7 @@ Compute Engine instance, then you can configure fluentd like this.
266
297
 
267
298
  time_format %s
268
299
  time_field time
269
-
270
- field_integer time,status,bytes
271
- field_string rhost,vhost,path,method,protocol,agent,referer
272
- field_float requesttime
273
- field_boolean bot_access,loginsession
300
+ ...
274
301
  </match>
275
302
  ```
276
303
 
@@ -419,10 +446,25 @@ you can also specify nested fields by prefixing their belonging record fields.
419
446
  time_format %s
420
447
  time_field time
421
448
 
422
- field_integer time,response.status,response.bytes
423
- field_string request.vhost,request.path,request.method,request.protocol,request.agent,request.referer,remote.host,remote.ip,remote.user
424
- field_float request.time
425
- field_boolean request.bot_access,request.loginsession
449
+ schema [
450
+ {"name": "time", "type": "INTEGER"},
451
+ {"name": "status", "type": "INTEGER"},
452
+ {"name": "bytes", "type": "INTEGER"},
453
+ {"name": "vhost", "type": "STRING"},
454
+ {"name": "path", "type": "STRING"},
455
+ {"name": "method", "type": "STRING"},
456
+ {"name": "protocol", "type": "STRING"},
457
+ {"name": "agent", "type": "STRING"},
458
+ {"name": "referer", "type": "STRING"},
459
+ {"name": "remote", "type": "RECORD", "fields": [
460
+ {"name": "host", "type": "STRING"},
461
+ {"name": "ip", "type": "STRING"},
462
+ {"name": "user", "type": "STRING"}
463
+ ]},
464
+ {"name": "requesttime", "type": "FLOAT"},
465
+ {"name": "bot_access", "type": "BOOLEAN"},
466
+ {"name": "loginsession", "type": "BOOLEAN"}
467
+ ]
426
468
  </match>
427
469
  ```
428
470
 
@@ -459,10 +501,9 @@ The second method is to specify a path to a BigQuery schema file instead of list
459
501
  time_field time
460
502
 
461
503
  schema_path /path/to/httpd.schema
462
- field_integer time
463
504
  </match>
464
505
  ```
465
- where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
506
+ where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexbility.
466
507
 
467
508
  The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
468
509
 
@@ -477,7 +518,6 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
477
518
 
478
519
  fetch_schema true
479
520
  # fetch_schema_table other_table # if you want to fetch schema from other table
480
- field_integer time
481
521
  </match>
482
522
  ```
483
523
 
@@ -498,17 +538,14 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
498
538
  ...
499
539
 
500
540
  insert_id_field uuid
501
- field_string uuid
541
+ schema [{"name": "uuid", "type": "STRING"}]
502
542
  </match>
503
543
  ```
504
544
 
505
545
  ## TODO
506
546
 
507
- * support optional data fields
508
- * support NULLABLE/REQUIRED/REPEATED field options in field list style of configuration
509
547
  * OAuth installed application credentials support
510
548
  * Google API discovery expiration
511
- * Error classes
512
549
  * check row size limits
513
550
 
514
551
  ## Authors
@@ -1,3 +1,5 @@
1
+ require 'multi_json'
2
+
1
3
  module Fluent
2
4
  module BigQuery
3
5
  class FieldSchema
@@ -56,7 +58,11 @@ module Fluent
56
58
  end
57
59
 
58
60
  def format_one(value)
59
- value.to_s
61
+ if value.is_a?(Hash) || value.is_a?(Array)
62
+ MultiJson.dump(value)
63
+ else
64
+ value.to_s
65
+ end
60
66
  end
61
67
  end
62
68
 
@@ -116,6 +122,48 @@ module Fluent
116
122
  end
117
123
  end
118
124
 
125
+ class DateFieldSchema < FieldSchema
126
+ def type
127
+ :date
128
+ end
129
+
130
+ def format_one(value)
131
+ if value.respond_to?(:strftime)
132
+ value.strftime("%Y-%m-%d")
133
+ else
134
+ value
135
+ end
136
+ end
137
+ end
138
+
139
+ class DateTimeFieldSchema < FieldSchema
140
+ def type
141
+ :datetime
142
+ end
143
+
144
+ def format_one(value)
145
+ if value.respond_to?(:strftime)
146
+ value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
147
+ else
148
+ value
149
+ end
150
+ end
151
+ end
152
+
153
+ class TimeFieldSchema < FieldSchema
154
+ def type
155
+ :time
156
+ end
157
+
158
+ def format_one(value)
159
+ if value.respond_to?(:strftime)
160
+ value.strftime("%H:%M:%S.%6L")
161
+ else
162
+ value
163
+ end
164
+ end
165
+ end
166
+
119
167
  class RecordSchema < FieldSchema
120
168
  FIELD_TYPES = {
121
169
  string: StringFieldSchema,
@@ -123,6 +171,9 @@ module Fluent
123
171
  float: FloatFieldSchema,
124
172
  boolean: BooleanFieldSchema,
125
173
  timestamp: TimestampFieldSchema,
174
+ date: DateFieldSchema,
175
+ datetime: DateTimeFieldSchema,
176
+ time: TimeFieldSchema,
126
177
  record: RecordSchema
127
178
  }.freeze
128
179
 
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.3.4".freeze
3
+ VERSION = "0.4.0".freeze
4
4
  end
5
5
  end
@@ -6,6 +6,7 @@ module Fluent
6
6
  @scope = "https://www.googleapis.com/auth/bigquery"
7
7
  @auth_options = auth_options
8
8
  @log = log
9
+ @num_errors_per_chunk = {}
9
10
 
10
11
  @cached_client_expiration = Time.now + 1800
11
12
  end
@@ -104,7 +105,7 @@ module Fluent
104
105
  raise Fluent::BigQuery::Error.wrap(e)
105
106
  end
106
107
 
107
- def create_load_job(project, dataset, table_id, upload_source, job_id, fields, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
108
+ def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
108
109
  configuration = {
109
110
  configuration: {
110
111
  load: {
@@ -123,6 +124,8 @@ module Fluent
123
124
  }
124
125
  }
125
126
  }
127
+
128
+ job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
126
129
  configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
127
130
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
128
131
 
@@ -148,7 +151,8 @@ module Fluent
148
151
  }
149
152
  }
150
153
  )
151
- wait_load_job(project, dataset, res.job_reference.job_id, table_id)
154
+ wait_load_job(chunk_id, project, dataset, res.job_reference.job_id, table_id)
155
+ @num_errors_per_chunk.delete(chunk_id)
152
156
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
153
157
  @client = nil
154
158
 
@@ -161,12 +165,16 @@ module Fluent
161
165
  raise "table created. send rows next time."
162
166
  end
163
167
 
164
- return wait_load_job(project, dataset, job_id, table_id) if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
168
+ if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
169
+ wait_load_job(chunk_id, project, dataset, job_id, table_id)
170
+ @num_errors_per_chunk.delete(chunk_id)
171
+ return
172
+ end
165
173
 
166
174
  raise Fluent::BigQuery::Error.wrap(e)
167
175
  end
168
176
 
169
- def wait_load_job(project, dataset, job_id, table_id, retryable: true)
177
+ def wait_load_job(chunk_id, project, dataset, job_id, table_id)
170
178
  wait_interval = 10
171
179
  _response = client.get_job(project, job_id)
172
180
 
@@ -186,9 +194,11 @@ module Fluent
186
194
  error_result = _response.status.error_result
187
195
  if error_result
188
196
  log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
189
- if retryable && Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
197
+ if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
198
+ @num_errors_per_chunk[chunk_id] = @num_errors_per_chunk[chunk_id].to_i + 1
190
199
  raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
191
200
  else
201
+ @num_errors_per_chunk.delete(chunk_id)
192
202
  raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
193
203
  end
194
204
  end
@@ -259,6 +269,12 @@ module Fluent
259
269
  def safe_table_id(table_id)
260
270
  table_id.gsub(/\$\d+$/, "")
261
271
  end
272
+
273
+ def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
274
+ job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
275
+ @log.debug "job_id_key: #{job_id_key}"
276
+ "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
277
+ end
262
278
  end
263
279
  end
264
280
  end
@@ -87,6 +87,7 @@ module Fluent
87
87
  # Default is false, which treats unknown values as errors.
88
88
  config_param :ignore_unknown_values, :bool, default: false
89
89
 
90
+ config_param :schema, :array, default: nil
90
91
  config_param :schema_path, :string, default: nil
91
92
  config_param :fetch_schema, :bool, default: false
92
93
  config_param :fetch_schema_table, :string, default: nil
@@ -213,7 +214,11 @@ module Fluent
213
214
 
214
215
  @tablelist = @tables ? @tables.split(',') : [@table]
215
216
 
217
+ legacy_schema_config_deprecation
216
218
  @fields = Fluent::BigQuery::RecordSchema.new('record')
219
+ if @schema
220
+ @fields.load_schema(@schema)
221
+ end
217
222
  if @schema_path
218
223
  @fields.load_schema(MultiJson.load(File.read(@schema_path)))
219
224
  end
@@ -259,6 +264,8 @@ module Fluent
259
264
  else
260
265
  @get_insert_id = nil
261
266
  end
267
+
268
+ warn "[DEPRECATION] `convert_hash_to_json` param is deprecated. If Hash value is inserted string field, plugin convert it to json automatically." if @convert_hash_to_json
262
269
  end
263
270
 
264
271
  def start
@@ -329,6 +336,12 @@ module Fluent
329
336
  record
330
337
  end
331
338
 
339
+ def legacy_schema_config_deprecation
340
+ if [@field_string, @field_integer, @field_float, @field_boolean, @field_timestamp].any?
341
+ warn "[DEPRECATION] `field_*` style schema config is deprecated. Instead of it, use `schema` config params that is array of json style."
342
+ end
343
+ end
344
+
332
345
  def write(chunk)
333
346
  table_id_format = @tables_mutex.synchronize do
334
347
  t = @tables_queue.shift
@@ -455,14 +468,9 @@ module Fluent
455
468
  def load(chunk, table_id)
456
469
  res = nil
457
470
 
458
- if @prevent_duplicate_load
459
- job_id = create_job_id(chunk, @dataset, table_id, @fields.to_a, @max_bad_records, @ignore_unknown_values)
460
- else
461
- job_id = nil
462
- end
463
-
464
471
  create_upload_source(chunk) do |upload_source|
465
- res = writer.create_load_job(@project, @dataset, table_id, upload_source, job_id, @fields, {
472
+ res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
473
+ prevent_duplicate_load: @prevent_duplicate_load,
466
474
  ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
467
475
  timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
468
476
  time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
@@ -494,10 +502,6 @@ module Fluent
494
502
  end
495
503
  end
496
504
  end
497
-
498
- def create_job_id(chunk, dataset, table, schema, max_bad_records, ignore_unknown_values)
499
- "fluentd_job_" + Digest::SHA1.hexdigest("#{chunk.unique_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}")
500
- end
501
505
  end
502
506
  end
503
507
  end
@@ -22,16 +22,31 @@ class BigQueryOutputTest < Test::Unit::TestCase
22
22
  time_format %s
23
23
  time_field time
24
24
 
25
- field_integer time,status,bytes
26
- field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
27
- field_float requesttime
28
- field_boolean bot_access,loginsession
25
+ schema [
26
+ {"name": "time", "type": "INTEGER"},
27
+ {"name": "status", "type": "INTEGER"},
28
+ {"name": "bytes", "type": "INTEGER"},
29
+ {"name": "vhost", "type": "STRING"},
30
+ {"name": "path", "type": "STRING"},
31
+ {"name": "method", "type": "STRING"},
32
+ {"name": "protocol", "type": "STRING"},
33
+ {"name": "agent", "type": "STRING"},
34
+ {"name": "referer", "type": "STRING"},
35
+ {"name": "remote", "type": "RECORD", "fields": [
36
+ {"name": "host", "type": "STRING"},
37
+ {"name": "ip", "type": "STRING"},
38
+ {"name": "user", "type": "STRING"}
39
+ ]},
40
+ {"name": "requesttime", "type": "FLOAT"},
41
+ {"name": "bot_access", "type": "BOOLEAN"},
42
+ {"name": "loginsession", "type": "BOOLEAN"}
43
+ ]
29
44
  ]
30
45
 
31
46
  API_SCOPE = "https://www.googleapis.com/auth/bigquery"
32
47
 
33
48
  def create_driver(conf = CONFIG)
34
- Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
49
+ Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf, true)
35
50
  end
36
51
 
37
52
  def stub_writer(driver)
@@ -91,7 +106,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
91
106
  auth_method compute_engine
92
107
  project yourproject_id
93
108
  dataset yourdataset_id
94
- field_integer time,status,bytes
109
+ schema [
110
+ {"name": "time", "type": "INTEGER"},
111
+ {"name": "status", "type": "INTEGER"},
112
+ {"name": "bytes", "type": "INTEGER"}
113
+ ]
95
114
  ])
96
115
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
97
116
  driver.instance.writer
@@ -114,7 +133,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
114
133
  json_key #{json_key_path}
115
134
  project yourproject_id
116
135
  dataset yourdataset_id
117
- field_integer time,status,bytes
136
+ schema [
137
+ {"name": "time", "type": "INTEGER"},
138
+ {"name": "status", "type": "INTEGER"},
139
+ {"name": "bytes", "type": "INTEGER"}
140
+ ]
118
141
  ])
119
142
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
120
143
  driver.instance.writer
@@ -134,7 +157,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
134
157
  json_key #{json_key_path}
135
158
  project yourproject_id
136
159
  dataset yourdataset_id
137
- field_integer time,status,bytes
160
+ schema [
161
+ {"name": "time", "type": "INTEGER"},
162
+ {"name": "status", "type": "INTEGER"},
163
+ {"name": "bytes", "type": "INTEGER"}
164
+ ]
138
165
  ])
139
166
  assert_raises(Errno::EACCES) do
140
167
  driver.instance.writer.client
@@ -147,9 +174,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
147
174
  def test_configure_auth_json_key_as_string
148
175
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
149
176
  json_key_io = StringIO.new(json_key)
150
- mock(StringIO).new(json_key) { json_key_io }
151
177
  authorization = Object.new
152
- mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
178
+ mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
153
179
 
154
180
  mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
155
181
  mock(cl).__send__(:authorization=, authorization) {}
@@ -162,7 +188,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
162
188
  json_key #{json_key}
163
189
  project yourproject_id
164
190
  dataset yourdataset_id
165
- field_integer time,status,bytes
191
+ schema [
192
+ {"name": "time", "type": "INTEGER"},
193
+ {"name": "status", "type": "INTEGER"},
194
+ {"name": "bytes", "type": "INTEGER"}
195
+ ]
166
196
  ])
167
197
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
168
198
  driver.instance.writer
@@ -183,7 +213,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
183
213
  auth_method application_default
184
214
  project yourproject_id
185
215
  dataset yourdataset_id
186
- field_integer time,status,bytes
216
+ schema [
217
+ {"name": "time", "type": "INTEGER"},
218
+ {"name": "status", "type": "INTEGER"},
219
+ {"name": "bytes", "type": "INTEGER"}
220
+ ]
187
221
  ])
188
222
 
189
223
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
@@ -191,186 +225,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
191
225
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
192
226
  end
193
227
 
194
- def test_configure_fieldname_stripped
195
- driver = create_driver(%[
196
- table foo
197
- email foo@bar.example
198
- private_key_path /path/to/key
199
- project yourproject_id
200
- dataset yourdataset_id
201
-
202
- time_format %s
203
- time_field time
204
-
205
- field_integer time , status , bytes
206
- field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
207
- field_float requesttime
208
- field_boolean bot_access , loginsession
209
- ])
210
- fields = driver.instance.instance_eval{ @fields }
211
-
212
- assert (not fields['time ']), "tailing spaces must be stripped"
213
- assert fields['time']
214
- assert fields['status']
215
- assert fields['bytes']
216
- assert fields['_log_name']
217
- assert fields['vhost']
218
- assert fields['protocol']
219
- assert fields['agent']
220
- assert fields['referer']
221
- assert fields['remote']['host']
222
- assert fields['remote']['ip']
223
- assert fields['remote']['user']
224
- assert fields['requesttime']
225
- assert fields['bot_access']
226
- assert fields['loginsession']
227
- end
228
-
229
- def test_configure_invalid_fieldname
230
- base = %[
231
- table foo
232
- email foo@bar.example
233
- private_key_path /path/to/key
234
- project yourproject_id
235
- dataset yourdataset_id
236
-
237
- time_format %s
238
- time_field time
239
- ]
240
-
241
- assert_raises(Fluent::ConfigError) do
242
- create_driver(base + "field_integer time field\n")
243
- end
244
- assert_raises(Fluent::ConfigError) do
245
- create_driver(base + "field_string my name\n")
246
- end
247
- assert_raises(Fluent::ConfigError) do
248
- create_driver(base + "field_string remote.host name\n")
249
- end
250
- assert_raises(Fluent::ConfigError) do
251
- create_driver(base + "field_string 1column\n")
252
- end
253
- assert_raises(Fluent::ConfigError) do
254
- create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
255
- end
256
- assert_raises(Fluent::ConfigError) do
257
- create_driver(base + "field_float request time\n")
258
- end
259
- assert_raises(Fluent::ConfigError) do
260
- create_driver(base + "field_boolean login session\n")
261
- end
262
- end
263
-
264
- def test_format_stream
265
- now = Time.now
266
- input = [
267
- now,
268
- {
269
- "status" => "1",
270
- "bytes" => 3.0,
271
- "vhost" => :bar,
272
- "path" => "/path/to/baz",
273
- "method" => "GET",
274
- "protocol" => "HTTP/0.9",
275
- "agent" => "libwww",
276
- "referer" => "http://referer.example",
277
- "requesttime" => (now - 1).to_f.to_s,
278
- "bot_access" => true,
279
- "loginsession" => false,
280
- "something-else" => "would be ignored",
281
- "yet-another" => {
282
- "foo" => "bar",
283
- "baz" => 1,
284
- },
285
- "remote" => {
286
- "host" => "remote.example",
287
- "ip" => "192.0.2.1",
288
- "port" => 12345,
289
- "user" => "tagomoris",
290
- }
291
- }
292
- ]
293
- expected = {
294
- "json" => {
295
- "time" => now.to_i,
296
- "status" => 1,
297
- "bytes" => 3,
298
- "vhost" => "bar",
299
- "path" => "/path/to/baz",
300
- "method" => "GET",
301
- "protocol" => "HTTP/0.9",
302
- "agent" => "libwww",
303
- "referer" => "http://referer.example",
304
- "requesttime" => (now - 1).to_f.to_s.to_f,
305
- "bot_access" => true,
306
- "loginsession" => false,
307
- "something-else" => "would be ignored",
308
- "yet-another" => {
309
- "foo" => "bar",
310
- "baz" => 1,
311
- },
312
- "remote" => {
313
- "host" => "remote.example",
314
- "ip" => "192.0.2.1",
315
- "port" => 12345,
316
- "user" => "tagomoris",
317
- }
318
- }
319
- }
320
-
321
- driver = create_driver(CONFIG)
322
- driver.instance.start
323
- buf = driver.instance.format_stream("my.tag", [input])
324
- driver.instance.shutdown
325
-
326
- assert_equal expected, MessagePack.unpack(buf)
327
- end
328
-
329
- [
330
- # <time_format>, <time field type>, <time expectation generator>, <assertion>
331
- [
332
- "%s.%6N", "field_float",
333
- lambda{|t| t.strftime("%s.%6N").to_f },
334
- lambda{|recv, expected, actual|
335
- recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
336
- }
337
- ],
338
- [
339
- "%Y-%m-%dT%H:%M:%SZ", "field_string",
340
- lambda{|t| t.iso8601 },
341
- :assert_equal.to_proc
342
- ],
343
- [
344
- "%a, %d %b %Y %H:%M:%S GMT", "field_string",
345
- lambda{|t| t.httpdate },
346
- :assert_equal.to_proc
347
- ],
348
- ].each do |format, type, expect_time, assert|
349
- define_method("test_time_formats_#{format}") do
350
- now = Time.now.utc
351
- input = [ now, {} ]
352
- expected = { "json" => { "time" => expect_time[now], } }
353
-
354
- driver = create_driver(<<-CONFIG)
355
- table foo
356
- email foo@bar.example
357
- private_key_path /path/to/key
358
- project yourproject_id
359
- dataset yourdataset_id
360
-
361
- time_format #{format}
362
- time_field time
363
- #{type} time
364
- CONFIG
365
-
366
- driver.instance.start
367
- buf = driver.instance.format_stream("my.tag", [input])
368
- driver.instance.shutdown
369
-
370
- assert[self, expected["json"]["time"], MessagePack.unpack(buf)["json"]["time"]]
371
- end
372
- end
373
-
374
228
  def test_format_nested_time
375
229
  now = Time.now
376
230
  input = [
@@ -402,8 +256,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
402
256
  time_format %s
403
257
  time_field metadata.time
404
258
 
405
- field_integer metadata.time
406
- field_string metadata.node,log
259
+ schema [
260
+ {"name": "metadata", "type": "RECORD", "fields": [
261
+ {"name": "time", "type": "INTEGER"},
262
+ {"name": "node", "type": "STRING"}
263
+ ]},
264
+ {"name": "log", "type": "STRING"}
265
+ ]
407
266
  CONFIG
408
267
 
409
268
  driver.instance.start
@@ -489,7 +348,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
489
348
  time_field time
490
349
 
491
350
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
492
- field_integer time
351
+ schema [{"name": "time", "type": "INTEGER"}]
493
352
  CONFIG
494
353
  driver.instance.start
495
354
  buf = driver.instance.format_stream("my.tag", [input])
@@ -529,7 +388,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
529
388
  time_field time
530
389
 
531
390
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
532
- field_integer time
391
+ schema [{"name": "time", "type": "INTEGER"}]
533
392
  CONFIG
534
393
  driver.instance.start
535
394
  buf = driver.instance.format_stream("my.tag", [input])
@@ -569,7 +428,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
569
428
  time_field time
570
429
 
571
430
  fetch_schema true
572
- field_integer time
431
+ schema [{"name": "time", "type": "INTEGER"}]
573
432
  CONFIG
574
433
 
575
434
  writer = stub_writer(driver)
@@ -635,7 +494,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
635
494
  time_field time
636
495
 
637
496
  fetch_schema true
638
- field_integer time
497
+ schema [{"name": "time", "type": "INTEGER"}]
639
498
  CONFIG
640
499
 
641
500
  writer = stub_writer(driver)
@@ -693,7 +552,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
693
552
  dataset yourdataset_id
694
553
 
695
554
  insert_id_field uuid
696
- field_string uuid
555
+ schema [{"name": "uuid", "type": "STRING"}]
697
556
  CONFIG
698
557
  driver.instance.start
699
558
  buf = driver.instance.format_stream("my.tag", [input])
@@ -729,7 +588,9 @@ class BigQueryOutputTest < Test::Unit::TestCase
729
588
  dataset yourdataset_id
730
589
 
731
590
  insert_id_field data.uuid
732
- field_string data.uuid
591
+ schema [{"name": "data", "type": "RECORD", "fields": [
592
+ {"name": "uuid", "type": "STRING"}
593
+ ]}]
733
594
  CONFIG
734
595
  driver.instance.start
735
596
  buf = driver.instance.format_stream("my.tag", [input])
@@ -758,7 +619,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
758
619
  project yourproject_id
759
620
  dataset yourdataset_id
760
621
 
761
- field_string uuid
622
+ schema [{"name": "uuid", "type": "STRING"}]
762
623
 
763
624
  buffer_type memory
764
625
  CONFIG
@@ -803,9 +664,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
803
664
  time_format %s
804
665
  time_field time
805
666
 
806
- field_integer time
807
- field_string vhost, referer
808
- field_boolean bot_access, login_session
667
+ schema [
668
+ {"name": "time", "type": "INTEGER"},
669
+ {"name": "vhost", "type": "STRING"},
670
+ {"name": "refere", "type": "STRING"},
671
+ {"name": "bot_access", "type": "BOOLEAN"},
672
+ {"name": "login_session", "type": "BOOLEAN"}
673
+ ]
809
674
  CONFIG
810
675
  driver.instance.start
811
676
  buf = driver.instance.format_stream("my.tag", [input])
@@ -854,9 +719,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
854
719
  time_format %s
855
720
  time_field time
856
721
 
857
- field_integer time
858
- field_string vhost, referer, remote
859
- field_boolean bot_access, loginsession
722
+ schema [
723
+ {"name": "time", "type": "INTEGER"},
724
+ {"name": "vhost", "type": "STRING"},
725
+ {"name": "refere", "type": "STRING"},
726
+ {"name": "bot_access", "type": "BOOLEAN"},
727
+ {"name": "loginsession", "type": "BOOLEAN"}
728
+ ]
860
729
  CONFIG
861
730
  driver.instance.start
862
731
  buf = driver.instance.format_stream("my.tag", [input])
@@ -906,10 +775,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
906
775
  time_format %s
907
776
  time_field time
908
777
 
909
- field_integer time,status,bytes
910
- field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
911
- field_float requesttime
912
- field_boolean bot_access,loginsession
778
+ schema [
779
+ {"name": "time", "type": "INTEGER"},
780
+ {"name": "status", "type": "INTEGER"},
781
+ {"name": "bytes", "type": "INTEGER"},
782
+ {"name": "vhost", "type": "STRING"},
783
+ {"name": "path", "type": "STRING"},
784
+ {"name": "method", "type": "STRING"},
785
+ {"name": "protocol", "type": "STRING"},
786
+ {"name": "agent", "type": "STRING"},
787
+ {"name": "referer", "type": "STRING"},
788
+ {"name": "remote", "type": "RECORD", "fields": [
789
+ {"name": "host", "type": "STRING"},
790
+ {"name": "ip", "type": "STRING"},
791
+ {"name": "user", "type": "STRING"}
792
+ ]},
793
+ {"name": "requesttime", "type": "FLOAT"},
794
+ {"name": "bot_access", "type": "BOOLEAN"},
795
+ {"name": "loginsession", "type": "BOOLEAN"}
796
+ ]
913
797
  <secondary>
914
798
  type file
915
799
  path error
@@ -951,10 +835,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
951
835
  time_format %s
952
836
  time_field time
953
837
 
954
- field_integer time,status,bytes
955
- field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
956
- field_float requesttime
957
- field_boolean bot_access,loginsession
838
+ schema [
839
+ {"name": "time", "type": "INTEGER"},
840
+ {"name": "status", "type": "INTEGER"},
841
+ {"name": "bytes", "type": "INTEGER"},
842
+ {"name": "vhost", "type": "STRING"},
843
+ {"name": "path", "type": "STRING"},
844
+ {"name": "method", "type": "STRING"},
845
+ {"name": "protocol", "type": "STRING"},
846
+ {"name": "agent", "type": "STRING"},
847
+ {"name": "referer", "type": "STRING"},
848
+ {"name": "remote", "type": "RECORD", "fields": [
849
+ {"name": "host", "type": "STRING"},
850
+ {"name": "ip", "type": "STRING"},
851
+ {"name": "user", "type": "STRING"}
852
+ ]},
853
+ {"name": "requesttime", "type": "FLOAT"},
854
+ {"name": "bot_access", "type": "BOOLEAN"},
855
+ {"name": "loginsession", "type": "BOOLEAN"}
856
+ ]
958
857
  <secondary>
959
858
  type file
960
859
  path error
@@ -1002,20 +901,16 @@ class BigQueryOutputTest < Test::Unit::TestCase
1002
901
  time_field time
1003
902
 
1004
903
  schema_path #{schema_path}
1005
- field_integer time
1006
904
 
1007
905
  buffer_type memory
1008
906
  CONFIG
1009
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1010
- h[0][:type] = "INTEGER"
1011
- h[0][:mode] = "NULLABLE"
1012
- end
907
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1013
908
 
1014
909
  writer = stub_writer(driver)
1015
910
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1016
911
  io = StringIO.new("hello")
1017
912
  mock(driver.instance).create_upload_source(chunk).yields(io)
1018
- mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
913
+ mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
1019
914
  mock(writer.client).insert_job('yourproject_id', {
1020
915
  configuration: {
1021
916
  load: {
@@ -1065,22 +960,17 @@ class BigQueryOutputTest < Test::Unit::TestCase
1065
960
  time_field time
1066
961
 
1067
962
  schema_path #{schema_path}
1068
- field_integer time
1069
963
  prevent_duplicate_load true
1070
964
 
1071
965
  buffer_type memory
1072
966
  CONFIG
1073
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1074
- h[0][:type] = "INTEGER"
1075
- h[0][:mode] = "NULLABLE"
1076
- end
967
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1077
968
 
1078
969
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1079
970
  io = StringIO.new("hello")
1080
971
  mock(driver.instance).create_upload_source(chunk).yields(io)
1081
- mock.proxy(driver.instance).create_job_id(duck_type(:unique_id), "yourdataset_id", "foo", driver.instance.instance_variable_get(:@fields).to_a, 0, false)
1082
972
  writer = stub_writer(driver)
1083
- mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
973
+ mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
1084
974
  mock(writer.client).insert_job('yourproject_id', {
1085
975
  configuration: {
1086
976
  load: {
@@ -1131,14 +1021,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
1131
1021
  time_field time
1132
1022
 
1133
1023
  schema_path #{schema_path}
1134
- field_integer time
1135
1024
 
1136
1025
  buffer_type memory
1137
1026
  CONFIG
1138
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1139
- h[0][:type] = "INTEGER"
1140
- h[0][:mode] = "NULLABLE"
1141
- end
1027
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1142
1028
 
1143
1029
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1144
1030
  io = StringIO.new("hello")
@@ -1209,7 +1095,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
1209
1095
  time_field time
1210
1096
 
1211
1097
  schema_path #{schema_path}
1212
- field_integer time
1213
1098
 
1214
1099
  buffer_type memory
1215
1100
  <secondary>
@@ -1218,10 +1103,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
1218
1103
  utc
1219
1104
  </secondary>
1220
1105
  CONFIG
1221
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1222
- h[0][:type] = "INTEGER"
1223
- h[0][:mode] = "NULLABLE"
1224
- end
1106
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1225
1107
 
1226
1108
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1227
1109
  io = StringIO.new("hello")
@@ -154,6 +154,23 @@ class RecordSchemaTest < Test::Unit::TestCase
154
154
  )
155
155
  end
156
156
 
157
+ def test_format_one_convert_array_or_hash_to_json
158
+ fields = Fluent::BigQuery::RecordSchema.new("record")
159
+ fields.load_schema(base_schema, false)
160
+
161
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
162
+
163
+ formatted = fields.format_one({
164
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42]
165
+ })
166
+ assert_equal(
167
+ formatted,
168
+ {
169
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"]
170
+ }
171
+ )
172
+ end
173
+
157
174
  def test_format_one_with_extra_column
158
175
  fields = Fluent::BigQuery::RecordSchema.new("record")
159
176
  fields.load_schema(base_schema, false)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-12-19 00:00:00.000000000 Z
12
+ date: 2017-01-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake