fluent-plugin-bigquery 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b4f46435678df7e2b883e2dfdbb0a77a9359481
4
- data.tar.gz: 7cbe9b3aa6e4c22ccf0299bfd0a8ff8d1892ccff
3
+ metadata.gz: 7d4074dc903c423acbebd56b2b4d6fc0ce110510
4
+ data.tar.gz: 4d17cd1b2ee3768b83845105b5b9a714835e0a4c
5
5
  SHA512:
6
- metadata.gz: b3e02368662e2c7448726d9d3652aa80d60cd19a040122cfdd05aed6b36949e2c5b760cc3812f690633e4ad3685fd01da14eacab65066caa18e754b8432e7dde
7
- data.tar.gz: b9d5c81042fe958b6230d9ebffbf6c09526d7f7bdb0b2641667e2c7c34ab70418d03542a23bb90bff23e565315f7c1e6817ba792b1e622d94d015bad6eea4ef0
6
+ metadata.gz: 7f99c64e394650b7eac03e6872dcfafb36981f48a726d8aba9d87fc83b45329ebac925c7d1239113995597e4697d8afcf1f9397c8583d0a3bbe11d47aedd668b
7
+ data.tar.gz: 52554bcd622e75486fc8a10ceeebd8af958ac5523869f2ae964324c1348b734fcef00c4232766484a0d3112c15b50eb06f334b6efaf2cd55394321139bc1df9e
data/README.md CHANGED
@@ -21,47 +21,48 @@ If you use ruby-2.1 or earlier, you must use activesupport-4.2.x or earlier.
21
21
 
22
22
  ### Options
23
23
 
24
- | name | type | required? | default | description |
25
- | :------------------------------------- | :------------ | :----------- | :------------------------- | :----------------------- |
26
- | method | string | no | insert | `insert` (Streaming Insert) or `load` (load job) |
27
- | buffer_type | string | no | lightening (insert) or file (load) | |
28
- | buffer_chunk_limit | integer | no | 1MB (insert) or 1GB (load) | |
29
- | buffer_queue_limit | integer | no | 1024 (insert) or 32 (load) | |
30
- | buffer_chunk_records_limit | integer | no | 500 | |
31
- | flush_interval | float | no | 0.25 (*insert) or default of time sliced output (load) | |
32
- | try_flush_interval | float | no | 0.05 (*insert) or default of time sliced output (load) | |
33
- | auth_method | enum | yes | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
34
- | email | string | yes (private_key) | nil | GCP Service Account Email |
35
- | private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
36
- | private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
37
- | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
38
- | project | string | yes | nil | |
39
- | table | string | yes (either `tables`) | nil | |
40
- | tables | string | yes (either `table`) | nil | can set multi table names splitted by `,` |
41
- | template_suffix | string | no | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
42
- | auto_create_table | bool | no | false | If true, creates table automatically |
43
- | skip_invalid_rows | bool | no | false | Only `insert` method. |
44
- | max_bad_records | integer | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
45
- | ignore_unknown_values | bool | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
46
- | schema_path | string | yes (either `fetch_schema`) | nil | Schema Definition file path. It is formatted by JSON. |
47
- | fetch_schema | bool | yes (either `schema_path`) | false | If true, fetch table schema definition from Bigquery table automatically. |
48
- | fetch_schema_table | string | no | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
49
- | schema_cache_expire | integer | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
50
- | field_string | string | no | nil | see examples. |
51
- | field_integer | string | no | nil | see examples. |
52
- | field_float | string | no | nil | see examples. |
53
- | field_boolean | string | no | nil | see examples. |
54
- | field_timestamp | string | no | nil | see examples. |
55
- | time_field | string | no | nil | If this param is set, plugin set formatted time string to this field. |
56
- | time_format | string | no | nil | ex. `%s`, `%Y/%m%d %H:%M:%S` |
57
- | replace_record_key | bool | no | false | see examples. |
58
- | replace_record_key_regexp{1-10} | string | no | nil | see examples. |
59
- | convert_hash_to_json | bool | no | false | If true, converts Hash value of record to JSON String. |
60
- | insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
61
- | request_timeout_sec | integer | no | nil | Bigquery API response timeout |
62
- | request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
63
- | time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
64
- | time_partitioning_expiration | time | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
24
+ | name | type | required? | default | description |
25
+ | :------------------------------------- | :------------ | :----------- | :------------------------- | :----------------------- |
26
+ | method | string | no | insert | `insert` (Streaming Insert) or `load` (load job) |
27
+ | buffer_type | string | no | lightening (insert) or file (load) | |
28
+ | buffer_chunk_limit | integer | no | 1MB (insert) or 1GB (load) | |
29
+ | buffer_queue_limit | integer | no | 1024 (insert) or 32 (load) | |
30
+ | buffer_chunk_records_limit | integer | no | 500 | |
31
+ | flush_interval | float | no | 0.25 (*insert) or default of time sliced output (load) | |
32
+ | try_flush_interval | float | no | 0.05 (*insert) or default of time sliced output (load) | |
33
+ | auth_method | enum | yes | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
34
+ | email | string | yes (private_key) | nil | GCP Service Account Email |
35
+ | private_key_path | string | yes (private_key) | nil | GCP Private Key file path |
36
+ | private_key_passphrase | string | yes (private_key) | nil | GCP Private Key Passphrase |
37
+ | json_key | string | yes (json_key) | nil | GCP JSON Key file path or JSON Key string |
38
+ | project | string | yes | nil | |
39
+ | table | string | yes (either `tables`) | nil | |
40
+ | tables | string | yes (either `table`) | nil | can set multi table names splitted by `,` |
41
+ | template_suffix | string | no | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
42
+ | auto_create_table | bool | no | false | If true, creates table automatically |
43
+ | skip_invalid_rows | bool | no | false | Only `insert` method. |
44
+ | max_bad_records | integer | no | 0 | Only `load` method. If the number of bad records exceeds this value, an invalid error is returned in the job result. |
45
+ | ignore_unknown_values | bool | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
46
+ | schema | array | yes (either `fetch_schema` or `schema_path`) | nil | Schema Definition. It is formatted by JSON. |
47
+ | schema_path | string | yes (either `fetch_schema`) | nil | Schema Definition file path. It is formatted by JSON. |
48
+ | fetch_schema | bool | yes (either `schema_path`) | false | If true, fetch table schema definition from Bigquery table automatically. |
49
+ | fetch_schema_table | string | no | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
50
+ | schema_cache_expire | integer | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
51
+ | field_string (deprecated) | string | no | nil | see examples. |
52
+ | field_integer (deprecated) | string | no | nil | see examples. |
53
+ | field_float (deprecated) | string | no | nil | see examples. |
54
+ | field_boolean (deprecated) | string | no | nil | see examples. |
55
+ | field_timestamp (deprecated) | string | no | nil | see examples. |
56
+ | time_field | string | no | nil | If this param is set, plugin set formatted time string to this field. |
57
+ | time_format | string | no | nil | ex. `%s`, `%Y/%m%d %H:%M:%S` |
58
+ | replace_record_key | bool | no | false | see examples. |
59
+ | replace_record_key_regexp{1-10} | string | no | nil | see examples. |
60
+ | convert_hash_to_json (deprecated) | bool | no | false | If true, converts Hash value of record to JSON String. |
61
+ | insert_id_field | string | no | nil | Use key as `insert_id` of Streaming Insert API parameter. |
62
+ | request_timeout_sec | integer | no | nil | Bigquery API response timeout |
63
+ | request_open_timeout_sec | integer | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
64
+ | time_partitioning_type | enum | no (either day) | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
65
+ | time_partitioning_expiration | time | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
65
66
 
66
67
  ### Standard Options
67
68
 
@@ -96,10 +97,25 @@ Configure insert specifications with target table schema, with your credentials.
96
97
  time_format %s
97
98
  time_field time
98
99
 
99
- field_integer time,status,bytes
100
- field_string rhost,vhost,path,method,protocol,agent,referer
101
- field_float requesttime
102
- field_boolean bot_access,loginsession
100
+ schema [
101
+ {"name": "time", "type": "INTEGER"},
102
+ {"name": "status", "type": "INTEGER"},
103
+ {"name": "bytes", "type": "INTEGER"},
104
+ {"name": "vhost", "type": "STRING"},
105
+ {"name": "path", "type": "STRING"},
106
+ {"name": "method", "type": "STRING"},
107
+ {"name": "protocol", "type": "STRING"},
108
+ {"name": "agent", "type": "STRING"},
109
+ {"name": "referer", "type": "STRING"},
110
+ {"name": "remote", "type": "RECORD", "fields": [
111
+ {"name": "host", "type": "STRING"},
112
+ {"name": "ip", "type": "STRING"},
113
+ {"name": "user", "type": "STRING"}
114
+ ]},
115
+ {"name": "requesttime", "type": "FLOAT"},
116
+ {"name": "bot_access", "type": "BOOLEAN"},
117
+ {"name": "loginsession", "type": "BOOLEAN"}
118
+ ]
103
119
  </match>
104
120
  ```
105
121
 
@@ -130,10 +146,25 @@ For high rate inserts over streaming inserts, you should specify flush intervals
130
146
  time_format %s
131
147
  time_field time
132
148
 
133
- field_integer time,status,bytes
134
- field_string rhost,vhost,path,method,protocol,agent,referer
135
- field_float requesttime
136
- field_boolean bot_access,loginsession
149
+ schema [
150
+ {"name": "time", "type": "INTEGER"},
151
+ {"name": "status", "type": "INTEGER"},
152
+ {"name": "bytes", "type": "INTEGER"},
153
+ {"name": "vhost", "type": "STRING"},
154
+ {"name": "path", "type": "STRING"},
155
+ {"name": "method", "type": "STRING"},
156
+ {"name": "protocol", "type": "STRING"},
157
+ {"name": "agent", "type": "STRING"},
158
+ {"name": "referer", "type": "STRING"},
159
+ {"name": "remote", "type": "RECORD", "fields": [
160
+ {"name": "host", "type": "STRING"},
161
+ {"name": "ip", "type": "STRING"},
162
+ {"name": "user", "type": "STRING"}
163
+ ]},
164
+ {"name": "requesttime", "type": "FLOAT"},
165
+ {"name": "bot_access", "type": "BOOLEAN"},
166
+ {"name": "loginsession", "type": "BOOLEAN"}
167
+ ]
137
168
  </match>
138
169
  ```
139
170
 
@@ -266,11 +297,7 @@ Compute Engine instance, then you can configure fluentd like this.
266
297
 
267
298
  time_format %s
268
299
  time_field time
269
-
270
- field_integer time,status,bytes
271
- field_string rhost,vhost,path,method,protocol,agent,referer
272
- field_float requesttime
273
- field_boolean bot_access,loginsession
300
+ ...
274
301
  </match>
275
302
  ```
276
303
 
@@ -419,10 +446,25 @@ you can also specify nested fields by prefixing their belonging record fields.
419
446
  time_format %s
420
447
  time_field time
421
448
 
422
- field_integer time,response.status,response.bytes
423
- field_string request.vhost,request.path,request.method,request.protocol,request.agent,request.referer,remote.host,remote.ip,remote.user
424
- field_float request.time
425
- field_boolean request.bot_access,request.loginsession
449
+ schema [
450
+ {"name": "time", "type": "INTEGER"},
451
+ {"name": "status", "type": "INTEGER"},
452
+ {"name": "bytes", "type": "INTEGER"},
453
+ {"name": "vhost", "type": "STRING"},
454
+ {"name": "path", "type": "STRING"},
455
+ {"name": "method", "type": "STRING"},
456
+ {"name": "protocol", "type": "STRING"},
457
+ {"name": "agent", "type": "STRING"},
458
+ {"name": "referer", "type": "STRING"},
459
+ {"name": "remote", "type": "RECORD", "fields": [
460
+ {"name": "host", "type": "STRING"},
461
+ {"name": "ip", "type": "STRING"},
462
+ {"name": "user", "type": "STRING"}
463
+ ]},
464
+ {"name": "requesttime", "type": "FLOAT"},
465
+ {"name": "bot_access", "type": "BOOLEAN"},
466
+ {"name": "loginsession", "type": "BOOLEAN"}
467
+ ]
426
468
  </match>
427
469
  ```
428
470
 
@@ -459,10 +501,9 @@ The second method is to specify a path to a BigQuery schema file instead of list
459
501
  time_field time
460
502
 
461
503
  schema_path /path/to/httpd.schema
462
- field_integer time
463
504
  </match>
464
505
  ```
465
- where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
506
+ where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery. By using external schema file you are able to write full schema that does support NULLABLE/REQUIRED/REPEATED, this feature is really useful and adds full flexbility.
466
507
 
467
508
  The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
468
509
 
@@ -477,7 +518,6 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
477
518
 
478
519
  fetch_schema true
479
520
  # fetch_schema_table other_table # if you want to fetch schema from other table
480
- field_integer time
481
521
  </match>
482
522
  ```
483
523
 
@@ -498,17 +538,14 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
498
538
  ...
499
539
 
500
540
  insert_id_field uuid
501
- field_string uuid
541
+ schema [{"name": "uuid", "type": "STRING"}]
502
542
  </match>
503
543
  ```
504
544
 
505
545
  ## TODO
506
546
 
507
- * support optional data fields
508
- * support NULLABLE/REQUIRED/REPEATED field options in field list style of configuration
509
547
  * OAuth installed application credentials support
510
548
  * Google API discovery expiration
511
- * Error classes
512
549
  * check row size limits
513
550
 
514
551
  ## Authors
@@ -1,3 +1,5 @@
1
+ require 'multi_json'
2
+
1
3
  module Fluent
2
4
  module BigQuery
3
5
  class FieldSchema
@@ -56,7 +58,11 @@ module Fluent
56
58
  end
57
59
 
58
60
  def format_one(value)
59
- value.to_s
61
+ if value.is_a?(Hash) || value.is_a?(Array)
62
+ MultiJson.dump(value)
63
+ else
64
+ value.to_s
65
+ end
60
66
  end
61
67
  end
62
68
 
@@ -116,6 +122,48 @@ module Fluent
116
122
  end
117
123
  end
118
124
 
125
+ class DateFieldSchema < FieldSchema
126
+ def type
127
+ :date
128
+ end
129
+
130
+ def format_one(value)
131
+ if value.respond_to?(:strftime)
132
+ value.strftime("%Y-%m-%d")
133
+ else
134
+ value
135
+ end
136
+ end
137
+ end
138
+
139
+ class DateTimeFieldSchema < FieldSchema
140
+ def type
141
+ :datetime
142
+ end
143
+
144
+ def format_one(value)
145
+ if value.respond_to?(:strftime)
146
+ value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
147
+ else
148
+ value
149
+ end
150
+ end
151
+ end
152
+
153
+ class TimeFieldSchema < FieldSchema
154
+ def type
155
+ :time
156
+ end
157
+
158
+ def format_one(value)
159
+ if value.respond_to?(:strftime)
160
+ value.strftime("%H:%M:%S.%6L")
161
+ else
162
+ value
163
+ end
164
+ end
165
+ end
166
+
119
167
  class RecordSchema < FieldSchema
120
168
  FIELD_TYPES = {
121
169
  string: StringFieldSchema,
@@ -123,6 +171,9 @@ module Fluent
123
171
  float: FloatFieldSchema,
124
172
  boolean: BooleanFieldSchema,
125
173
  timestamp: TimestampFieldSchema,
174
+ date: DateFieldSchema,
175
+ datetime: DateTimeFieldSchema,
176
+ time: TimeFieldSchema,
126
177
  record: RecordSchema
127
178
  }.freeze
128
179
 
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.3.4".freeze
3
+ VERSION = "0.4.0".freeze
4
4
  end
5
5
  end
@@ -6,6 +6,7 @@ module Fluent
6
6
  @scope = "https://www.googleapis.com/auth/bigquery"
7
7
  @auth_options = auth_options
8
8
  @log = log
9
+ @num_errors_per_chunk = {}
9
10
 
10
11
  @cached_client_expiration = Time.now + 1800
11
12
  end
@@ -104,7 +105,7 @@ module Fluent
104
105
  raise Fluent::BigQuery::Error.wrap(e)
105
106
  end
106
107
 
107
- def create_load_job(project, dataset, table_id, upload_source, job_id, fields, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
108
+ def create_load_job(chunk_id, project, dataset, table_id, upload_source, fields, prevent_duplicate_load: false, ignore_unknown_values: false, max_bad_records: 0, timeout_sec: nil, open_timeout_sec: 60, auto_create_table: nil, time_partitioning_type: nil, time_partitioning_expiration: nil)
108
109
  configuration = {
109
110
  configuration: {
110
111
  load: {
@@ -123,6 +124,8 @@ module Fluent
123
124
  }
124
125
  }
125
126
  }
127
+
128
+ job_id = create_job_id(chunk_id, dataset, table_id, fields.to_a, max_bad_records, ignore_unknown_values) if prevent_duplicate_load
126
129
  configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if time_partitioning_type
127
130
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
128
131
 
@@ -148,7 +151,8 @@ module Fluent
148
151
  }
149
152
  }
150
153
  )
151
- wait_load_job(project, dataset, res.job_reference.job_id, table_id)
154
+ wait_load_job(chunk_id, project, dataset, res.job_reference.job_id, table_id)
155
+ @num_errors_per_chunk.delete(chunk_id)
152
156
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
153
157
  @client = nil
154
158
 
@@ -161,12 +165,16 @@ module Fluent
161
165
  raise "table created. send rows next time."
162
166
  end
163
167
 
164
- return wait_load_job(project, dataset, job_id, table_id) if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
168
+ if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
169
+ wait_load_job(chunk_id, project, dataset, job_id, table_id)
170
+ @num_errors_per_chunk.delete(chunk_id)
171
+ return
172
+ end
165
173
 
166
174
  raise Fluent::BigQuery::Error.wrap(e)
167
175
  end
168
176
 
169
- def wait_load_job(project, dataset, job_id, table_id, retryable: true)
177
+ def wait_load_job(chunk_id, project, dataset, job_id, table_id)
170
178
  wait_interval = 10
171
179
  _response = client.get_job(project, job_id)
172
180
 
@@ -186,9 +194,11 @@ module Fluent
186
194
  error_result = _response.status.error_result
187
195
  if error_result
188
196
  log.error "job.insert API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
189
- if retryable && Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
197
+ if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
198
+ @num_errors_per_chunk[chunk_id] = @num_errors_per_chunk[chunk_id].to_i + 1
190
199
  raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
191
200
  else
201
+ @num_errors_per_chunk.delete(chunk_id)
192
202
  raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
193
203
  end
194
204
  end
@@ -259,6 +269,12 @@ module Fluent
259
269
  def safe_table_id(table_id)
260
270
  table_id.gsub(/\$\d+$/, "")
261
271
  end
272
+
273
+ def create_job_id(chunk_id, dataset, table, schema, max_bad_records, ignore_unknown_values)
274
+ job_id_key = "#{chunk_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}#{@num_errors_per_chunk[chunk_id]}"
275
+ @log.debug "job_id_key: #{job_id_key}"
276
+ "fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
277
+ end
262
278
  end
263
279
  end
264
280
  end
@@ -87,6 +87,7 @@ module Fluent
87
87
  # Default is false, which treats unknown values as errors.
88
88
  config_param :ignore_unknown_values, :bool, default: false
89
89
 
90
+ config_param :schema, :array, default: nil
90
91
  config_param :schema_path, :string, default: nil
91
92
  config_param :fetch_schema, :bool, default: false
92
93
  config_param :fetch_schema_table, :string, default: nil
@@ -213,7 +214,11 @@ module Fluent
213
214
 
214
215
  @tablelist = @tables ? @tables.split(',') : [@table]
215
216
 
217
+ legacy_schema_config_deprecation
216
218
  @fields = Fluent::BigQuery::RecordSchema.new('record')
219
+ if @schema
220
+ @fields.load_schema(@schema)
221
+ end
217
222
  if @schema_path
218
223
  @fields.load_schema(MultiJson.load(File.read(@schema_path)))
219
224
  end
@@ -259,6 +264,8 @@ module Fluent
259
264
  else
260
265
  @get_insert_id = nil
261
266
  end
267
+
268
+ warn "[DEPRECATION] `convert_hash_to_json` param is deprecated. If Hash value is inserted string field, plugin convert it to json automatically." if @convert_hash_to_json
262
269
  end
263
270
 
264
271
  def start
@@ -329,6 +336,12 @@ module Fluent
329
336
  record
330
337
  end
331
338
 
339
+ def legacy_schema_config_deprecation
340
+ if [@field_string, @field_integer, @field_float, @field_boolean, @field_timestamp].any?
341
+ warn "[DEPRECATION] `field_*` style schema config is deprecated. Instead of it, use `schema` config params that is array of json style."
342
+ end
343
+ end
344
+
332
345
  def write(chunk)
333
346
  table_id_format = @tables_mutex.synchronize do
334
347
  t = @tables_queue.shift
@@ -455,14 +468,9 @@ module Fluent
455
468
  def load(chunk, table_id)
456
469
  res = nil
457
470
 
458
- if @prevent_duplicate_load
459
- job_id = create_job_id(chunk, @dataset, table_id, @fields.to_a, @max_bad_records, @ignore_unknown_values)
460
- else
461
- job_id = nil
462
- end
463
-
464
471
  create_upload_source(chunk) do |upload_source|
465
- res = writer.create_load_job(@project, @dataset, table_id, upload_source, job_id, @fields, {
472
+ res = writer.create_load_job(chunk.unique_id, @project, @dataset, table_id, upload_source, @fields, {
473
+ prevent_duplicate_load: @prevent_duplicate_load,
466
474
  ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
467
475
  timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec, auto_create_table: @auto_create_table,
468
476
  time_partitioning_type: @time_partitioning_type, time_partitioning_expiration: @time_partitioning_expiration
@@ -494,10 +502,6 @@ module Fluent
494
502
  end
495
503
  end
496
504
  end
497
-
498
- def create_job_id(chunk, dataset, table, schema, max_bad_records, ignore_unknown_values)
499
- "fluentd_job_" + Digest::SHA1.hexdigest("#{chunk.unique_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}")
500
- end
501
505
  end
502
506
  end
503
507
  end
@@ -22,16 +22,31 @@ class BigQueryOutputTest < Test::Unit::TestCase
22
22
  time_format %s
23
23
  time_field time
24
24
 
25
- field_integer time,status,bytes
26
- field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
27
- field_float requesttime
28
- field_boolean bot_access,loginsession
25
+ schema [
26
+ {"name": "time", "type": "INTEGER"},
27
+ {"name": "status", "type": "INTEGER"},
28
+ {"name": "bytes", "type": "INTEGER"},
29
+ {"name": "vhost", "type": "STRING"},
30
+ {"name": "path", "type": "STRING"},
31
+ {"name": "method", "type": "STRING"},
32
+ {"name": "protocol", "type": "STRING"},
33
+ {"name": "agent", "type": "STRING"},
34
+ {"name": "referer", "type": "STRING"},
35
+ {"name": "remote", "type": "RECORD", "fields": [
36
+ {"name": "host", "type": "STRING"},
37
+ {"name": "ip", "type": "STRING"},
38
+ {"name": "user", "type": "STRING"}
39
+ ]},
40
+ {"name": "requesttime", "type": "FLOAT"},
41
+ {"name": "bot_access", "type": "BOOLEAN"},
42
+ {"name": "loginsession", "type": "BOOLEAN"}
43
+ ]
29
44
  ]
30
45
 
31
46
  API_SCOPE = "https://www.googleapis.com/auth/bigquery"
32
47
 
33
48
  def create_driver(conf = CONFIG)
34
- Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
49
+ Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf, true)
35
50
  end
36
51
 
37
52
  def stub_writer(driver)
@@ -91,7 +106,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
91
106
  auth_method compute_engine
92
107
  project yourproject_id
93
108
  dataset yourdataset_id
94
- field_integer time,status,bytes
109
+ schema [
110
+ {"name": "time", "type": "INTEGER"},
111
+ {"name": "status", "type": "INTEGER"},
112
+ {"name": "bytes", "type": "INTEGER"}
113
+ ]
95
114
  ])
96
115
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
97
116
  driver.instance.writer
@@ -114,7 +133,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
114
133
  json_key #{json_key_path}
115
134
  project yourproject_id
116
135
  dataset yourdataset_id
117
- field_integer time,status,bytes
136
+ schema [
137
+ {"name": "time", "type": "INTEGER"},
138
+ {"name": "status", "type": "INTEGER"},
139
+ {"name": "bytes", "type": "INTEGER"}
140
+ ]
118
141
  ])
119
142
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
120
143
  driver.instance.writer
@@ -134,7 +157,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
134
157
  json_key #{json_key_path}
135
158
  project yourproject_id
136
159
  dataset yourdataset_id
137
- field_integer time,status,bytes
160
+ schema [
161
+ {"name": "time", "type": "INTEGER"},
162
+ {"name": "status", "type": "INTEGER"},
163
+ {"name": "bytes", "type": "INTEGER"}
164
+ ]
138
165
  ])
139
166
  assert_raises(Errno::EACCES) do
140
167
  driver.instance.writer.client
@@ -147,9 +174,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
147
174
  def test_configure_auth_json_key_as_string
148
175
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
149
176
  json_key_io = StringIO.new(json_key)
150
- mock(StringIO).new(json_key) { json_key_io }
151
177
  authorization = Object.new
152
- mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
178
+ mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
153
179
 
154
180
  mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
155
181
  mock(cl).__send__(:authorization=, authorization) {}
@@ -162,7 +188,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
162
188
  json_key #{json_key}
163
189
  project yourproject_id
164
190
  dataset yourdataset_id
165
- field_integer time,status,bytes
191
+ schema [
192
+ {"name": "time", "type": "INTEGER"},
193
+ {"name": "status", "type": "INTEGER"},
194
+ {"name": "bytes", "type": "INTEGER"}
195
+ ]
166
196
  ])
167
197
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
168
198
  driver.instance.writer
@@ -183,7 +213,11 @@ class BigQueryOutputTest < Test::Unit::TestCase
183
213
  auth_method application_default
184
214
  project yourproject_id
185
215
  dataset yourdataset_id
186
- field_integer time,status,bytes
216
+ schema [
217
+ {"name": "time", "type": "INTEGER"},
218
+ {"name": "status", "type": "INTEGER"},
219
+ {"name": "bytes", "type": "INTEGER"}
220
+ ]
187
221
  ])
188
222
 
189
223
  mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
@@ -191,186 +225,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
191
225
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
192
226
  end
193
227
 
194
- def test_configure_fieldname_stripped
195
- driver = create_driver(%[
196
- table foo
197
- email foo@bar.example
198
- private_key_path /path/to/key
199
- project yourproject_id
200
- dataset yourdataset_id
201
-
202
- time_format %s
203
- time_field time
204
-
205
- field_integer time , status , bytes
206
- field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
207
- field_float requesttime
208
- field_boolean bot_access , loginsession
209
- ])
210
- fields = driver.instance.instance_eval{ @fields }
211
-
212
- assert (not fields['time ']), "tailing spaces must be stripped"
213
- assert fields['time']
214
- assert fields['status']
215
- assert fields['bytes']
216
- assert fields['_log_name']
217
- assert fields['vhost']
218
- assert fields['protocol']
219
- assert fields['agent']
220
- assert fields['referer']
221
- assert fields['remote']['host']
222
- assert fields['remote']['ip']
223
- assert fields['remote']['user']
224
- assert fields['requesttime']
225
- assert fields['bot_access']
226
- assert fields['loginsession']
227
- end
228
-
229
- def test_configure_invalid_fieldname
230
- base = %[
231
- table foo
232
- email foo@bar.example
233
- private_key_path /path/to/key
234
- project yourproject_id
235
- dataset yourdataset_id
236
-
237
- time_format %s
238
- time_field time
239
- ]
240
-
241
- assert_raises(Fluent::ConfigError) do
242
- create_driver(base + "field_integer time field\n")
243
- end
244
- assert_raises(Fluent::ConfigError) do
245
- create_driver(base + "field_string my name\n")
246
- end
247
- assert_raises(Fluent::ConfigError) do
248
- create_driver(base + "field_string remote.host name\n")
249
- end
250
- assert_raises(Fluent::ConfigError) do
251
- create_driver(base + "field_string 1column\n")
252
- end
253
- assert_raises(Fluent::ConfigError) do
254
- create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
255
- end
256
- assert_raises(Fluent::ConfigError) do
257
- create_driver(base + "field_float request time\n")
258
- end
259
- assert_raises(Fluent::ConfigError) do
260
- create_driver(base + "field_boolean login session\n")
261
- end
262
- end
263
-
264
- def test_format_stream
265
- now = Time.now
266
- input = [
267
- now,
268
- {
269
- "status" => "1",
270
- "bytes" => 3.0,
271
- "vhost" => :bar,
272
- "path" => "/path/to/baz",
273
- "method" => "GET",
274
- "protocol" => "HTTP/0.9",
275
- "agent" => "libwww",
276
- "referer" => "http://referer.example",
277
- "requesttime" => (now - 1).to_f.to_s,
278
- "bot_access" => true,
279
- "loginsession" => false,
280
- "something-else" => "would be ignored",
281
- "yet-another" => {
282
- "foo" => "bar",
283
- "baz" => 1,
284
- },
285
- "remote" => {
286
- "host" => "remote.example",
287
- "ip" => "192.0.2.1",
288
- "port" => 12345,
289
- "user" => "tagomoris",
290
- }
291
- }
292
- ]
293
- expected = {
294
- "json" => {
295
- "time" => now.to_i,
296
- "status" => 1,
297
- "bytes" => 3,
298
- "vhost" => "bar",
299
- "path" => "/path/to/baz",
300
- "method" => "GET",
301
- "protocol" => "HTTP/0.9",
302
- "agent" => "libwww",
303
- "referer" => "http://referer.example",
304
- "requesttime" => (now - 1).to_f.to_s.to_f,
305
- "bot_access" => true,
306
- "loginsession" => false,
307
- "something-else" => "would be ignored",
308
- "yet-another" => {
309
- "foo" => "bar",
310
- "baz" => 1,
311
- },
312
- "remote" => {
313
- "host" => "remote.example",
314
- "ip" => "192.0.2.1",
315
- "port" => 12345,
316
- "user" => "tagomoris",
317
- }
318
- }
319
- }
320
-
321
- driver = create_driver(CONFIG)
322
- driver.instance.start
323
- buf = driver.instance.format_stream("my.tag", [input])
324
- driver.instance.shutdown
325
-
326
- assert_equal expected, MessagePack.unpack(buf)
327
- end
328
-
329
- [
330
- # <time_format>, <time field type>, <time expectation generator>, <assertion>
331
- [
332
- "%s.%6N", "field_float",
333
- lambda{|t| t.strftime("%s.%6N").to_f },
334
- lambda{|recv, expected, actual|
335
- recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
336
- }
337
- ],
338
- [
339
- "%Y-%m-%dT%H:%M:%SZ", "field_string",
340
- lambda{|t| t.iso8601 },
341
- :assert_equal.to_proc
342
- ],
343
- [
344
- "%a, %d %b %Y %H:%M:%S GMT", "field_string",
345
- lambda{|t| t.httpdate },
346
- :assert_equal.to_proc
347
- ],
348
- ].each do |format, type, expect_time, assert|
349
- define_method("test_time_formats_#{format}") do
350
- now = Time.now.utc
351
- input = [ now, {} ]
352
- expected = { "json" => { "time" => expect_time[now], } }
353
-
354
- driver = create_driver(<<-CONFIG)
355
- table foo
356
- email foo@bar.example
357
- private_key_path /path/to/key
358
- project yourproject_id
359
- dataset yourdataset_id
360
-
361
- time_format #{format}
362
- time_field time
363
- #{type} time
364
- CONFIG
365
-
366
- driver.instance.start
367
- buf = driver.instance.format_stream("my.tag", [input])
368
- driver.instance.shutdown
369
-
370
- assert[self, expected["json"]["time"], MessagePack.unpack(buf)["json"]["time"]]
371
- end
372
- end
373
-
374
228
  def test_format_nested_time
375
229
  now = Time.now
376
230
  input = [
@@ -402,8 +256,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
402
256
  time_format %s
403
257
  time_field metadata.time
404
258
 
405
- field_integer metadata.time
406
- field_string metadata.node,log
259
+ schema [
260
+ {"name": "metadata", "type": "RECORD", "fields": [
261
+ {"name": "time", "type": "INTEGER"},
262
+ {"name": "node", "type": "STRING"}
263
+ ]},
264
+ {"name": "log", "type": "STRING"}
265
+ ]
407
266
  CONFIG
408
267
 
409
268
  driver.instance.start
@@ -489,7 +348,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
489
348
  time_field time
490
349
 
491
350
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
492
- field_integer time
351
+ schema [{"name": "time", "type": "INTEGER"}]
493
352
  CONFIG
494
353
  driver.instance.start
495
354
  buf = driver.instance.format_stream("my.tag", [input])
@@ -529,7 +388,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
529
388
  time_field time
530
389
 
531
390
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
532
- field_integer time
391
+ schema [{"name": "time", "type": "INTEGER"}]
533
392
  CONFIG
534
393
  driver.instance.start
535
394
  buf = driver.instance.format_stream("my.tag", [input])
@@ -569,7 +428,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
569
428
  time_field time
570
429
 
571
430
  fetch_schema true
572
- field_integer time
431
+ schema [{"name": "time", "type": "INTEGER"}]
573
432
  CONFIG
574
433
 
575
434
  writer = stub_writer(driver)
@@ -635,7 +494,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
635
494
  time_field time
636
495
 
637
496
  fetch_schema true
638
- field_integer time
497
+ schema [{"name": "time", "type": "INTEGER"}]
639
498
  CONFIG
640
499
 
641
500
  writer = stub_writer(driver)
@@ -693,7 +552,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
693
552
  dataset yourdataset_id
694
553
 
695
554
  insert_id_field uuid
696
- field_string uuid
555
+ schema [{"name": "uuid", "type": "STRING"}]
697
556
  CONFIG
698
557
  driver.instance.start
699
558
  buf = driver.instance.format_stream("my.tag", [input])
@@ -729,7 +588,9 @@ class BigQueryOutputTest < Test::Unit::TestCase
729
588
  dataset yourdataset_id
730
589
 
731
590
  insert_id_field data.uuid
732
- field_string data.uuid
591
+ schema [{"name": "data", "type": "RECORD", "fields": [
592
+ {"name": "uuid", "type": "STRING"}
593
+ ]}]
733
594
  CONFIG
734
595
  driver.instance.start
735
596
  buf = driver.instance.format_stream("my.tag", [input])
@@ -758,7 +619,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
758
619
  project yourproject_id
759
620
  dataset yourdataset_id
760
621
 
761
- field_string uuid
622
+ schema [{"name": "uuid", "type": "STRING"}]
762
623
 
763
624
  buffer_type memory
764
625
  CONFIG
@@ -803,9 +664,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
803
664
  time_format %s
804
665
  time_field time
805
666
 
806
- field_integer time
807
- field_string vhost, referer
808
- field_boolean bot_access, login_session
667
+ schema [
668
+ {"name": "time", "type": "INTEGER"},
669
+ {"name": "vhost", "type": "STRING"},
670
+ {"name": "refere", "type": "STRING"},
671
+ {"name": "bot_access", "type": "BOOLEAN"},
672
+ {"name": "login_session", "type": "BOOLEAN"}
673
+ ]
809
674
  CONFIG
810
675
  driver.instance.start
811
676
  buf = driver.instance.format_stream("my.tag", [input])
@@ -854,9 +719,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
854
719
  time_format %s
855
720
  time_field time
856
721
 
857
- field_integer time
858
- field_string vhost, referer, remote
859
- field_boolean bot_access, loginsession
722
+ schema [
723
+ {"name": "time", "type": "INTEGER"},
724
+ {"name": "vhost", "type": "STRING"},
725
+ {"name": "refere", "type": "STRING"},
726
+ {"name": "bot_access", "type": "BOOLEAN"},
727
+ {"name": "loginsession", "type": "BOOLEAN"}
728
+ ]
860
729
  CONFIG
861
730
  driver.instance.start
862
731
  buf = driver.instance.format_stream("my.tag", [input])
@@ -906,10 +775,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
906
775
  time_format %s
907
776
  time_field time
908
777
 
909
- field_integer time,status,bytes
910
- field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
911
- field_float requesttime
912
- field_boolean bot_access,loginsession
778
+ schema [
779
+ {"name": "time", "type": "INTEGER"},
780
+ {"name": "status", "type": "INTEGER"},
781
+ {"name": "bytes", "type": "INTEGER"},
782
+ {"name": "vhost", "type": "STRING"},
783
+ {"name": "path", "type": "STRING"},
784
+ {"name": "method", "type": "STRING"},
785
+ {"name": "protocol", "type": "STRING"},
786
+ {"name": "agent", "type": "STRING"},
787
+ {"name": "referer", "type": "STRING"},
788
+ {"name": "remote", "type": "RECORD", "fields": [
789
+ {"name": "host", "type": "STRING"},
790
+ {"name": "ip", "type": "STRING"},
791
+ {"name": "user", "type": "STRING"}
792
+ ]},
793
+ {"name": "requesttime", "type": "FLOAT"},
794
+ {"name": "bot_access", "type": "BOOLEAN"},
795
+ {"name": "loginsession", "type": "BOOLEAN"}
796
+ ]
913
797
  <secondary>
914
798
  type file
915
799
  path error
@@ -951,10 +835,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
951
835
  time_format %s
952
836
  time_field time
953
837
 
954
- field_integer time,status,bytes
955
- field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
956
- field_float requesttime
957
- field_boolean bot_access,loginsession
838
+ schema [
839
+ {"name": "time", "type": "INTEGER"},
840
+ {"name": "status", "type": "INTEGER"},
841
+ {"name": "bytes", "type": "INTEGER"},
842
+ {"name": "vhost", "type": "STRING"},
843
+ {"name": "path", "type": "STRING"},
844
+ {"name": "method", "type": "STRING"},
845
+ {"name": "protocol", "type": "STRING"},
846
+ {"name": "agent", "type": "STRING"},
847
+ {"name": "referer", "type": "STRING"},
848
+ {"name": "remote", "type": "RECORD", "fields": [
849
+ {"name": "host", "type": "STRING"},
850
+ {"name": "ip", "type": "STRING"},
851
+ {"name": "user", "type": "STRING"}
852
+ ]},
853
+ {"name": "requesttime", "type": "FLOAT"},
854
+ {"name": "bot_access", "type": "BOOLEAN"},
855
+ {"name": "loginsession", "type": "BOOLEAN"}
856
+ ]
958
857
  <secondary>
959
858
  type file
960
859
  path error
@@ -1002,20 +901,16 @@ class BigQueryOutputTest < Test::Unit::TestCase
1002
901
  time_field time
1003
902
 
1004
903
  schema_path #{schema_path}
1005
- field_integer time
1006
904
 
1007
905
  buffer_type memory
1008
906
  CONFIG
1009
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1010
- h[0][:type] = "INTEGER"
1011
- h[0][:mode] = "NULLABLE"
1012
- end
907
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1013
908
 
1014
909
  writer = stub_writer(driver)
1015
910
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1016
911
  io = StringIO.new("hello")
1017
912
  mock(driver.instance).create_upload_source(chunk).yields(io)
1018
- mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
913
+ mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
1019
914
  mock(writer.client).insert_job('yourproject_id', {
1020
915
  configuration: {
1021
916
  load: {
@@ -1065,22 +960,17 @@ class BigQueryOutputTest < Test::Unit::TestCase
1065
960
  time_field time
1066
961
 
1067
962
  schema_path #{schema_path}
1068
- field_integer time
1069
963
  prevent_duplicate_load true
1070
964
 
1071
965
  buffer_type memory
1072
966
  CONFIG
1073
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1074
- h[0][:type] = "INTEGER"
1075
- h[0][:mode] = "NULLABLE"
1076
- end
967
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1077
968
 
1078
969
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1079
970
  io = StringIO.new("hello")
1080
971
  mock(driver.instance).create_upload_source(chunk).yields(io)
1081
- mock.proxy(driver.instance).create_job_id(duck_type(:unique_id), "yourdataset_id", "foo", driver.instance.instance_variable_get(:@fields).to_a, 0, false)
1082
972
  writer = stub_writer(driver)
1083
- mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
973
+ mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
1084
974
  mock(writer.client).insert_job('yourproject_id', {
1085
975
  configuration: {
1086
976
  load: {
@@ -1131,14 +1021,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
1131
1021
  time_field time
1132
1022
 
1133
1023
  schema_path #{schema_path}
1134
- field_integer time
1135
1024
 
1136
1025
  buffer_type memory
1137
1026
  CONFIG
1138
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1139
- h[0][:type] = "INTEGER"
1140
- h[0][:mode] = "NULLABLE"
1141
- end
1027
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1142
1028
 
1143
1029
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1144
1030
  io = StringIO.new("hello")
@@ -1209,7 +1095,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
1209
1095
  time_field time
1210
1096
 
1211
1097
  schema_path #{schema_path}
1212
- field_integer time
1213
1098
 
1214
1099
  buffer_type memory
1215
1100
  <secondary>
@@ -1218,10 +1103,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
1218
1103
  utc
1219
1104
  </secondary>
1220
1105
  CONFIG
1221
- schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
1222
- h[0][:type] = "INTEGER"
1223
- h[0][:mode] = "NULLABLE"
1224
- end
1106
+ schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys)
1225
1107
 
1226
1108
  chunk = Fluent::MemoryBufferChunk.new("my.tag")
1227
1109
  io = StringIO.new("hello")
@@ -154,6 +154,23 @@ class RecordSchemaTest < Test::Unit::TestCase
154
154
  )
155
155
  end
156
156
 
157
+ def test_format_one_convert_array_or_hash_to_json
158
+ fields = Fluent::BigQuery::RecordSchema.new("record")
159
+ fields.load_schema(base_schema, false)
160
+
161
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
162
+
163
+ formatted = fields.format_one({
164
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42]
165
+ })
166
+ assert_equal(
167
+ formatted,
168
+ {
169
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"]
170
+ }
171
+ )
172
+ end
173
+
157
174
  def test_format_one_with_extra_column
158
175
  fields = Fluent::BigQuery::RecordSchema.new("record")
159
176
  fields.load_schema(base_schema, false)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-12-19 00:00:00.000000000 Z
12
+ date: 2017-01-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake