fluent-plugin-bigquery 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +31 -7
- data/fluent-plugin-bigquery.gemspec +3 -3
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +42 -3
- data/test/plugin/test_out_bigquery.rb +107 -0
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc7d16f7400a52c0cbf655c22b6c919572142182
|
4
|
+
data.tar.gz: 7c83037e999617f73a0fc6a51d41167a979b72a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a25328702bfad9ddff282cbcd9c8d84d64e2857158c1bc7a20ce388610bb5150e969ac0f07ebd1162b01ffca29b50cf920a28c3806715512cad4a9ff6cbe4d5c
|
7
|
+
data.tar.gz: 9701a7f10f17e0f7dbaf61bca99d6f73e1b769deba6361935cc413c1bbc5039349c626a84309f57919e884228cc3084eb38d1fdad83fde99505a68509d171145
|
data/README.md
CHANGED
@@ -165,10 +165,11 @@ because there is a time lag between collection and transmission of logs.
|
|
165
165
|
|
166
166
|
### Table schema
|
167
167
|
|
168
|
-
There are
|
168
|
+
There are three methods to describe the schema of the target table.
|
169
169
|
|
170
170
|
1. List fields in fluent.conf
|
171
171
|
2. Load a schema file in JSON.
|
172
|
+
3. Fetch a schema using BigQuery API
|
172
173
|
|
173
174
|
The examples above use the first method. In this method,
|
174
175
|
you can also specify nested fields by prefixing their belonging record fields.
|
@@ -176,12 +177,12 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
176
177
|
```apache
|
177
178
|
<match dummy>
|
178
179
|
type bigquery
|
179
|
-
|
180
|
+
|
180
181
|
...
|
181
182
|
|
182
183
|
time_format %s
|
183
184
|
time_field time
|
184
|
-
|
185
|
+
|
185
186
|
field_integer time,response.status,response.bytes
|
186
187
|
field_string request.vhost,request.path,request.method,request.protocol,request.agent,request.referer,remote.host,remote.ip,remote.user
|
187
188
|
field_float request.time
|
@@ -215,20 +216,38 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
215
216
|
```apache
|
216
217
|
<match dummy>
|
217
218
|
type bigquery
|
218
|
-
|
219
|
+
|
219
220
|
...
|
220
|
-
|
221
|
+
|
221
222
|
time_format %s
|
222
223
|
time_field time
|
223
|
-
|
224
|
+
|
224
225
|
schema_path /path/to/httpd.schema
|
225
226
|
field_integer time
|
226
227
|
</match>
|
227
228
|
```
|
228
229
|
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
|
229
230
|
|
231
|
+
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
232
|
+
|
233
|
+
```apache
|
234
|
+
<match dummy>
|
235
|
+
type bigquery
|
236
|
+
|
237
|
+
...
|
238
|
+
|
239
|
+
time_format %s
|
240
|
+
time_field time
|
241
|
+
|
242
|
+
fetch_schema true
|
243
|
+
field_integer time
|
244
|
+
</match>
|
245
|
+
```
|
246
|
+
|
247
|
+
If you specify multiple talbe in configuration file, plugin get all schema data from BigQuery and merge it.
|
248
|
+
|
230
249
|
NOTE: Since JSON does not define how to encode data of TIMESTAMP type,
|
231
|
-
you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example.
|
250
|
+
you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example, if you use second or third method.
|
232
251
|
|
233
252
|
## TODO
|
234
253
|
|
@@ -240,3 +259,8 @@ you are still recommended to specify JSON types for TIMESTAMP fields as "time" f
|
|
240
259
|
* Google API discovery expiration
|
241
260
|
* Error classes
|
242
261
|
* check row size limits
|
262
|
+
|
263
|
+
## Authors
|
264
|
+
|
265
|
+
* @tagomoris: First author, original version
|
266
|
+
* KAIZEN platform Inc.: Maintener, Since 2014.08.19
|
@@ -6,11 +6,11 @@ require 'fluent/plugin/bigquery/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "fluent-plugin-bigquery"
|
8
8
|
spec.version = Fluent::BigQueryPlugin::VERSION
|
9
|
-
spec.authors = ["
|
10
|
-
spec.email = ["
|
9
|
+
spec.authors = ["Naoya Ito"]
|
10
|
+
spec.email = ["i.naoya@gmail.com"]
|
11
11
|
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
12
12
|
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
13
|
-
spec.homepage = "https://github.com/
|
13
|
+
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
14
14
|
spec.license = "APLv2"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
@@ -62,6 +62,7 @@ module Fluent
|
|
62
62
|
config_param :tables, :string, :default => nil
|
63
63
|
|
64
64
|
config_param :schema_path, :string, :default => nil
|
65
|
+
config_param :fetch_schema, :bool, :default => false
|
65
66
|
config_param :field_string, :string, :default => nil
|
66
67
|
config_param :field_integer, :string, :default => nil
|
67
68
|
config_param :field_float, :string, :default => nil
|
@@ -201,6 +202,8 @@ module Fluent
|
|
201
202
|
|
202
203
|
@tables_queue = @tablelist.dup.shuffle
|
203
204
|
@tables_mutex = Mutex.new
|
205
|
+
|
206
|
+
fetch_schema() if @fetch_schema
|
204
207
|
end
|
205
208
|
|
206
209
|
def shutdown
|
@@ -270,7 +273,7 @@ module Fluent
|
|
270
273
|
log.warn "Parse error: google api error response body", :body => res.body
|
271
274
|
end
|
272
275
|
end
|
273
|
-
log.error "tabledata.insertAll API", :project_id => @
|
276
|
+
log.error "tabledata.insertAll API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
|
274
277
|
raise "failed to insert into bigquery" # TODO: error class
|
275
278
|
end
|
276
279
|
end
|
@@ -307,6 +310,40 @@ module Fluent
|
|
307
310
|
insert(insert_table, rows)
|
308
311
|
end
|
309
312
|
|
313
|
+
def fetch_schema
|
314
|
+
table_id = @tablelist[0]
|
315
|
+
res = client.execute(
|
316
|
+
:api_method => @bq.tables.get,
|
317
|
+
:parameters => {
|
318
|
+
'projectId' => @project,
|
319
|
+
'datasetId' => @dataset,
|
320
|
+
'tableId' => table_id,
|
321
|
+
}
|
322
|
+
)
|
323
|
+
|
324
|
+
unless res.success?
|
325
|
+
# api_error? -> client cache clear
|
326
|
+
@cached_client = nil
|
327
|
+
|
328
|
+
message = res.body
|
329
|
+
if res.body =~ /^\{/
|
330
|
+
begin
|
331
|
+
res_obj = JSON.parse(res.body)
|
332
|
+
message = res_obj['error']['message'] || res.body
|
333
|
+
rescue => e
|
334
|
+
log.warn "Parse error: google api error response body", :body => res.body
|
335
|
+
end
|
336
|
+
end
|
337
|
+
log.error "tables.get API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
|
338
|
+
raise "failed to fetch schema from bigquery" # TODO: error class
|
339
|
+
end
|
340
|
+
|
341
|
+
res_obj = JSON.parse(res.body)
|
342
|
+
schema = res_obj['schema']['fields']
|
343
|
+
log.debug "Load schema from BigQuery: #{@project}:#{@dataset}.#{table_id} #{schema}"
|
344
|
+
@fields.load_schema(schema, false)
|
345
|
+
end
|
346
|
+
|
310
347
|
# def client_oauth # not implemented
|
311
348
|
# raise NotImplementedError, "OAuth needs browser authentication..."
|
312
349
|
#
|
@@ -434,7 +471,7 @@ module Fluent
|
|
434
471
|
@fields[name]
|
435
472
|
end
|
436
473
|
|
437
|
-
def load_schema(schema)
|
474
|
+
def load_schema(schema, allow_overwrite=true)
|
438
475
|
schema.each do |field|
|
439
476
|
raise ConfigError, 'field must have type' unless field.key?('type')
|
440
477
|
|
@@ -445,11 +482,13 @@ module Fluent
|
|
445
482
|
field_schema_class = FIELD_TYPES[type]
|
446
483
|
raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
|
447
484
|
|
485
|
+
next if @fields.key?(name) and !allow_overwrite
|
486
|
+
|
448
487
|
field_schema = field_schema_class.new(name, mode)
|
449
488
|
@fields[name] = field_schema
|
450
489
|
if type == :record
|
451
490
|
raise ConfigError, "record field must have fields" unless field.key?('fields')
|
452
|
-
field_schema.load_schema(field['fields'])
|
491
|
+
field_schema.load_schema(field['fields'], allow_overwrite)
|
453
492
|
end
|
454
493
|
end
|
455
494
|
end
|
@@ -413,6 +413,113 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
413
413
|
assert_equal expected, MessagePack.unpack(buf)
|
414
414
|
end
|
415
415
|
|
416
|
+
def test_format_fetch_from_bigquery_api
|
417
|
+
now = Time.now
|
418
|
+
input = [
|
419
|
+
now,
|
420
|
+
{
|
421
|
+
"tty" => nil,
|
422
|
+
"pwd" => "/home/yugui",
|
423
|
+
"user" => "fluentd",
|
424
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
425
|
+
}
|
426
|
+
]
|
427
|
+
expected = {
|
428
|
+
"json" => {
|
429
|
+
"time" => now.to_i,
|
430
|
+
"pwd" => "/home/yugui",
|
431
|
+
"user" => "fluentd",
|
432
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
433
|
+
}
|
434
|
+
}
|
435
|
+
|
436
|
+
driver = create_driver(<<-CONFIG)
|
437
|
+
table foo
|
438
|
+
email foo@bar.example
|
439
|
+
private_key_path /path/to/key
|
440
|
+
project yourproject_id
|
441
|
+
dataset yourdataset_id
|
442
|
+
|
443
|
+
time_format %s
|
444
|
+
time_field time
|
445
|
+
|
446
|
+
fetch_schema true
|
447
|
+
field_integer time
|
448
|
+
CONFIG
|
449
|
+
mock_client(driver) do |expect|
|
450
|
+
expect.discovered_api("bigquery", "v2") { mock!.tables.mock!.get { Object.new } }
|
451
|
+
expect.execute(
|
452
|
+
:api_method => anything,
|
453
|
+
:parameters => {
|
454
|
+
'projectId' => 'yourproject_id',
|
455
|
+
'datasetId' => 'yourdataset_id',
|
456
|
+
'tableId' => 'foo'
|
457
|
+
}
|
458
|
+
) {
|
459
|
+
s = stub!
|
460
|
+
s.success? { true }
|
461
|
+
s.body {
|
462
|
+
JSON.generate({
|
463
|
+
schema: {
|
464
|
+
fields: [
|
465
|
+
{
|
466
|
+
name: "time",
|
467
|
+
type: "TIMESTAMP",
|
468
|
+
mode: "REQUIRED"
|
469
|
+
},
|
470
|
+
{
|
471
|
+
name: "tty",
|
472
|
+
type: "STRING",
|
473
|
+
mode: "NULLABLE"
|
474
|
+
},
|
475
|
+
{
|
476
|
+
name: "pwd",
|
477
|
+
type: "STRING",
|
478
|
+
mode: "REQUIRED"
|
479
|
+
},
|
480
|
+
{
|
481
|
+
name: "user",
|
482
|
+
type: "STRING",
|
483
|
+
mode: "REQUIRED"
|
484
|
+
},
|
485
|
+
{
|
486
|
+
name: "argv",
|
487
|
+
type: "STRING",
|
488
|
+
mode: "REPEATED"
|
489
|
+
}
|
490
|
+
]
|
491
|
+
}
|
492
|
+
})
|
493
|
+
}
|
494
|
+
s
|
495
|
+
}
|
496
|
+
end
|
497
|
+
driver.instance.start
|
498
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
499
|
+
driver.instance.shutdown
|
500
|
+
|
501
|
+
fields = driver.instance.instance_eval{ @fields }
|
502
|
+
assert fields["time"]
|
503
|
+
assert_equal :integer, fields["time"].type # DO NOT OVERWRITE
|
504
|
+
assert_equal :nullable, fields["time"].mode # DO NOT OVERWRITE
|
505
|
+
|
506
|
+
assert fields["tty"]
|
507
|
+
assert_equal :string, fields["tty"].type
|
508
|
+
assert_equal :nullable, fields["tty"].mode
|
509
|
+
|
510
|
+
assert fields["pwd"]
|
511
|
+
assert_equal :string, fields["pwd"].type
|
512
|
+
assert_equal :required, fields["pwd"].mode
|
513
|
+
|
514
|
+
assert fields["user"]
|
515
|
+
assert_equal :string, fields["user"].type
|
516
|
+
assert_equal :required, fields["user"].mode
|
517
|
+
|
518
|
+
assert fields["argv"]
|
519
|
+
assert_equal :string, fields["argv"].type
|
520
|
+
assert_equal :repeated, fields["argv"].mode
|
521
|
+
end
|
522
|
+
|
416
523
|
def test_empty_value_in_required
|
417
524
|
now = Time.now
|
418
525
|
input = [
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -125,7 +125,7 @@ dependencies:
|
|
125
125
|
description: Fluentd plugin to store data on Google BigQuery, by load, or by stream
|
126
126
|
inserts
|
127
127
|
email:
|
128
|
-
-
|
128
|
+
- i.naoya@gmail.com
|
129
129
|
executables: []
|
130
130
|
extensions: []
|
131
131
|
extra_rdoc_files: []
|
@@ -145,7 +145,7 @@ files:
|
|
145
145
|
- test/plugin/testdata/apache.schema
|
146
146
|
- test/plugin/testdata/sudo.schema
|
147
147
|
- test/test_load_request_body_wrapper.rb
|
148
|
-
homepage: https://github.com/
|
148
|
+
homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
|
149
149
|
licenses:
|
150
150
|
- APLv2
|
151
151
|
metadata: {}
|
@@ -175,4 +175,3 @@ test_files:
|
|
175
175
|
- test/plugin/testdata/apache.schema
|
176
176
|
- test/plugin/testdata/sudo.schema
|
177
177
|
- test/test_load_request_body_wrapper.rb
|
178
|
-
has_rdoc:
|