fluent-plugin-bigquery 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -7
- data/fluent-plugin-bigquery.gemspec +3 -3
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +42 -3
- data/test/plugin/test_out_bigquery.rb +107 -0
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc7d16f7400a52c0cbf655c22b6c919572142182
|
4
|
+
data.tar.gz: 7c83037e999617f73a0fc6a51d41167a979b72a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a25328702bfad9ddff282cbcd9c8d84d64e2857158c1bc7a20ce388610bb5150e969ac0f07ebd1162b01ffca29b50cf920a28c3806715512cad4a9ff6cbe4d5c
|
7
|
+
data.tar.gz: 9701a7f10f17e0f7dbaf61bca99d6f73e1b769deba6361935cc413c1bbc5039349c626a84309f57919e884228cc3084eb38d1fdad83fde99505a68509d171145
|
data/README.md
CHANGED
@@ -165,10 +165,11 @@ because there is a time lag between collection and transmission of logs.
|
|
165
165
|
|
166
166
|
### Table schema
|
167
167
|
|
168
|
-
There are
|
168
|
+
There are three methods to describe the schema of the target table.
|
169
169
|
|
170
170
|
1. List fields in fluent.conf
|
171
171
|
2. Load a schema file in JSON.
|
172
|
+
3. Fetch a schema using BigQuery API
|
172
173
|
|
173
174
|
The examples above use the first method. In this method,
|
174
175
|
you can also specify nested fields by prefixing their belonging record fields.
|
@@ -176,12 +177,12 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
176
177
|
```apache
|
177
178
|
<match dummy>
|
178
179
|
type bigquery
|
179
|
-
|
180
|
+
|
180
181
|
...
|
181
182
|
|
182
183
|
time_format %s
|
183
184
|
time_field time
|
184
|
-
|
185
|
+
|
185
186
|
field_integer time,response.status,response.bytes
|
186
187
|
field_string request.vhost,request.path,request.method,request.protocol,request.agent,request.referer,remote.host,remote.ip,remote.user
|
187
188
|
field_float request.time
|
@@ -215,20 +216,38 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
215
216
|
```apache
|
216
217
|
<match dummy>
|
217
218
|
type bigquery
|
218
|
-
|
219
|
+
|
219
220
|
...
|
220
|
-
|
221
|
+
|
221
222
|
time_format %s
|
222
223
|
time_field time
|
223
|
-
|
224
|
+
|
224
225
|
schema_path /path/to/httpd.schema
|
225
226
|
field_integer time
|
226
227
|
</match>
|
227
228
|
```
|
228
229
|
where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
|
229
230
|
|
231
|
+
The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API. In this case, your fluent.conf looks like:
|
232
|
+
|
233
|
+
```apache
|
234
|
+
<match dummy>
|
235
|
+
type bigquery
|
236
|
+
|
237
|
+
...
|
238
|
+
|
239
|
+
time_format %s
|
240
|
+
time_field time
|
241
|
+
|
242
|
+
fetch_schema true
|
243
|
+
field_integer time
|
244
|
+
</match>
|
245
|
+
```
|
246
|
+
|
247
|
+
If you specify multiple talbe in configuration file, plugin get all schema data from BigQuery and merge it.
|
248
|
+
|
230
249
|
NOTE: Since JSON does not define how to encode data of TIMESTAMP type,
|
231
|
-
you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example.
|
250
|
+
you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example, if you use second or third method.
|
232
251
|
|
233
252
|
## TODO
|
234
253
|
|
@@ -240,3 +259,8 @@ you are still recommended to specify JSON types for TIMESTAMP fields as "time" f
|
|
240
259
|
* Google API discovery expiration
|
241
260
|
* Error classes
|
242
261
|
* check row size limits
|
262
|
+
|
263
|
+
## Authors
|
264
|
+
|
265
|
+
* @tagomoris: First author, original version
|
266
|
+
* KAIZEN platform Inc.: Maintener, Since 2014.08.19
|
@@ -6,11 +6,11 @@ require 'fluent/plugin/bigquery/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "fluent-plugin-bigquery"
|
8
8
|
spec.version = Fluent::BigQueryPlugin::VERSION
|
9
|
-
spec.authors = ["
|
10
|
-
spec.email = ["
|
9
|
+
spec.authors = ["Naoya Ito"]
|
10
|
+
spec.email = ["i.naoya@gmail.com"]
|
11
11
|
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
12
12
|
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
13
|
-
spec.homepage = "https://github.com/
|
13
|
+
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
14
14
|
spec.license = "APLv2"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
@@ -62,6 +62,7 @@ module Fluent
|
|
62
62
|
config_param :tables, :string, :default => nil
|
63
63
|
|
64
64
|
config_param :schema_path, :string, :default => nil
|
65
|
+
config_param :fetch_schema, :bool, :default => false
|
65
66
|
config_param :field_string, :string, :default => nil
|
66
67
|
config_param :field_integer, :string, :default => nil
|
67
68
|
config_param :field_float, :string, :default => nil
|
@@ -201,6 +202,8 @@ module Fluent
|
|
201
202
|
|
202
203
|
@tables_queue = @tablelist.dup.shuffle
|
203
204
|
@tables_mutex = Mutex.new
|
205
|
+
|
206
|
+
fetch_schema() if @fetch_schema
|
204
207
|
end
|
205
208
|
|
206
209
|
def shutdown
|
@@ -270,7 +273,7 @@ module Fluent
|
|
270
273
|
log.warn "Parse error: google api error response body", :body => res.body
|
271
274
|
end
|
272
275
|
end
|
273
|
-
log.error "tabledata.insertAll API", :project_id => @
|
276
|
+
log.error "tabledata.insertAll API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
|
274
277
|
raise "failed to insert into bigquery" # TODO: error class
|
275
278
|
end
|
276
279
|
end
|
@@ -307,6 +310,40 @@ module Fluent
|
|
307
310
|
insert(insert_table, rows)
|
308
311
|
end
|
309
312
|
|
313
|
+
def fetch_schema
|
314
|
+
table_id = @tablelist[0]
|
315
|
+
res = client.execute(
|
316
|
+
:api_method => @bq.tables.get,
|
317
|
+
:parameters => {
|
318
|
+
'projectId' => @project,
|
319
|
+
'datasetId' => @dataset,
|
320
|
+
'tableId' => table_id,
|
321
|
+
}
|
322
|
+
)
|
323
|
+
|
324
|
+
unless res.success?
|
325
|
+
# api_error? -> client cache clear
|
326
|
+
@cached_client = nil
|
327
|
+
|
328
|
+
message = res.body
|
329
|
+
if res.body =~ /^\{/
|
330
|
+
begin
|
331
|
+
res_obj = JSON.parse(res.body)
|
332
|
+
message = res_obj['error']['message'] || res.body
|
333
|
+
rescue => e
|
334
|
+
log.warn "Parse error: google api error response body", :body => res.body
|
335
|
+
end
|
336
|
+
end
|
337
|
+
log.error "tables.get API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
|
338
|
+
raise "failed to fetch schema from bigquery" # TODO: error class
|
339
|
+
end
|
340
|
+
|
341
|
+
res_obj = JSON.parse(res.body)
|
342
|
+
schema = res_obj['schema']['fields']
|
343
|
+
log.debug "Load schema from BigQuery: #{@project}:#{@dataset}.#{table_id} #{schema}"
|
344
|
+
@fields.load_schema(schema, false)
|
345
|
+
end
|
346
|
+
|
310
347
|
# def client_oauth # not implemented
|
311
348
|
# raise NotImplementedError, "OAuth needs browser authentication..."
|
312
349
|
#
|
@@ -434,7 +471,7 @@ module Fluent
|
|
434
471
|
@fields[name]
|
435
472
|
end
|
436
473
|
|
437
|
-
def load_schema(schema)
|
474
|
+
def load_schema(schema, allow_overwrite=true)
|
438
475
|
schema.each do |field|
|
439
476
|
raise ConfigError, 'field must have type' unless field.key?('type')
|
440
477
|
|
@@ -445,11 +482,13 @@ module Fluent
|
|
445
482
|
field_schema_class = FIELD_TYPES[type]
|
446
483
|
raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
|
447
484
|
|
485
|
+
next if @fields.key?(name) and !allow_overwrite
|
486
|
+
|
448
487
|
field_schema = field_schema_class.new(name, mode)
|
449
488
|
@fields[name] = field_schema
|
450
489
|
if type == :record
|
451
490
|
raise ConfigError, "record field must have fields" unless field.key?('fields')
|
452
|
-
field_schema.load_schema(field['fields'])
|
491
|
+
field_schema.load_schema(field['fields'], allow_overwrite)
|
453
492
|
end
|
454
493
|
end
|
455
494
|
end
|
@@ -413,6 +413,113 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
413
413
|
assert_equal expected, MessagePack.unpack(buf)
|
414
414
|
end
|
415
415
|
|
416
|
+
def test_format_fetch_from_bigquery_api
|
417
|
+
now = Time.now
|
418
|
+
input = [
|
419
|
+
now,
|
420
|
+
{
|
421
|
+
"tty" => nil,
|
422
|
+
"pwd" => "/home/yugui",
|
423
|
+
"user" => "fluentd",
|
424
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
425
|
+
}
|
426
|
+
]
|
427
|
+
expected = {
|
428
|
+
"json" => {
|
429
|
+
"time" => now.to_i,
|
430
|
+
"pwd" => "/home/yugui",
|
431
|
+
"user" => "fluentd",
|
432
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
433
|
+
}
|
434
|
+
}
|
435
|
+
|
436
|
+
driver = create_driver(<<-CONFIG)
|
437
|
+
table foo
|
438
|
+
email foo@bar.example
|
439
|
+
private_key_path /path/to/key
|
440
|
+
project yourproject_id
|
441
|
+
dataset yourdataset_id
|
442
|
+
|
443
|
+
time_format %s
|
444
|
+
time_field time
|
445
|
+
|
446
|
+
fetch_schema true
|
447
|
+
field_integer time
|
448
|
+
CONFIG
|
449
|
+
mock_client(driver) do |expect|
|
450
|
+
expect.discovered_api("bigquery", "v2") { mock!.tables.mock!.get { Object.new } }
|
451
|
+
expect.execute(
|
452
|
+
:api_method => anything,
|
453
|
+
:parameters => {
|
454
|
+
'projectId' => 'yourproject_id',
|
455
|
+
'datasetId' => 'yourdataset_id',
|
456
|
+
'tableId' => 'foo'
|
457
|
+
}
|
458
|
+
) {
|
459
|
+
s = stub!
|
460
|
+
s.success? { true }
|
461
|
+
s.body {
|
462
|
+
JSON.generate({
|
463
|
+
schema: {
|
464
|
+
fields: [
|
465
|
+
{
|
466
|
+
name: "time",
|
467
|
+
type: "TIMESTAMP",
|
468
|
+
mode: "REQUIRED"
|
469
|
+
},
|
470
|
+
{
|
471
|
+
name: "tty",
|
472
|
+
type: "STRING",
|
473
|
+
mode: "NULLABLE"
|
474
|
+
},
|
475
|
+
{
|
476
|
+
name: "pwd",
|
477
|
+
type: "STRING",
|
478
|
+
mode: "REQUIRED"
|
479
|
+
},
|
480
|
+
{
|
481
|
+
name: "user",
|
482
|
+
type: "STRING",
|
483
|
+
mode: "REQUIRED"
|
484
|
+
},
|
485
|
+
{
|
486
|
+
name: "argv",
|
487
|
+
type: "STRING",
|
488
|
+
mode: "REPEATED"
|
489
|
+
}
|
490
|
+
]
|
491
|
+
}
|
492
|
+
})
|
493
|
+
}
|
494
|
+
s
|
495
|
+
}
|
496
|
+
end
|
497
|
+
driver.instance.start
|
498
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
499
|
+
driver.instance.shutdown
|
500
|
+
|
501
|
+
fields = driver.instance.instance_eval{ @fields }
|
502
|
+
assert fields["time"]
|
503
|
+
assert_equal :integer, fields["time"].type # DO NOT OVERWRITE
|
504
|
+
assert_equal :nullable, fields["time"].mode # DO NOT OVERWRITE
|
505
|
+
|
506
|
+
assert fields["tty"]
|
507
|
+
assert_equal :string, fields["tty"].type
|
508
|
+
assert_equal :nullable, fields["tty"].mode
|
509
|
+
|
510
|
+
assert fields["pwd"]
|
511
|
+
assert_equal :string, fields["pwd"].type
|
512
|
+
assert_equal :required, fields["pwd"].mode
|
513
|
+
|
514
|
+
assert fields["user"]
|
515
|
+
assert_equal :string, fields["user"].type
|
516
|
+
assert_equal :required, fields["user"].mode
|
517
|
+
|
518
|
+
assert fields["argv"]
|
519
|
+
assert_equal :string, fields["argv"].type
|
520
|
+
assert_equal :repeated, fields["argv"].mode
|
521
|
+
end
|
522
|
+
|
416
523
|
def test_empty_value_in_required
|
417
524
|
now = Time.now
|
418
525
|
input = [
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -125,7 +125,7 @@ dependencies:
|
|
125
125
|
description: Fluentd plugin to store data on Google BigQuery, by load, or by stream
|
126
126
|
inserts
|
127
127
|
email:
|
128
|
-
-
|
128
|
+
- i.naoya@gmail.com
|
129
129
|
executables: []
|
130
130
|
extensions: []
|
131
131
|
extra_rdoc_files: []
|
@@ -145,7 +145,7 @@ files:
|
|
145
145
|
- test/plugin/testdata/apache.schema
|
146
146
|
- test/plugin/testdata/sudo.schema
|
147
147
|
- test/test_load_request_body_wrapper.rb
|
148
|
-
homepage: https://github.com/
|
148
|
+
homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
|
149
149
|
licenses:
|
150
150
|
- APLv2
|
151
151
|
metadata: {}
|
@@ -175,4 +175,3 @@ test_files:
|
|
175
175
|
- test/plugin/testdata/apache.schema
|
176
176
|
- test/plugin/testdata/sudo.schema
|
177
177
|
- test/test_load_request_body_wrapper.rb
|
178
|
-
has_rdoc:
|