fluent-plugin-bigquery 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +10 -3
- data/README.md +12 -2
- data/fluent-plugin-bigquery.gemspec +1 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +34 -9
- data/test/plugin/test_out_bigquery.rb +134 -4
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8266112b60355067f61e514946b70ccea8b1eed4
|
4
|
+
data.tar.gz: 8bb87ae21e1391c18f51ca4ae6806e5a7e8219ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea18b5361d0789b91bd41c7838a560d1c3207158b69b5f8abc269e44dd020a26b9db6b9e58c24b84114956384cd44b147a9e9f0e0eec4af920a4779e6493bffc
|
7
|
+
data.tar.gz: 739c72ae73c4d28354c53e0bae9dc1b2f9126cd52cbe84784a36f2b69fcae42724a01c29688af15c5a3c8b891dcae909725929473c74e51ab1f1e02997ab8fd1
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -220,8 +220,18 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
220
220
|
</match>
|
221
221
|
```
|
222
222
|
|
223
|
-
|
224
|
-
|
223
|
+
The format can be suffixed with attribute name.
|
224
|
+
|
225
|
+
```apache
|
226
|
+
<match dummy>
|
227
|
+
...
|
228
|
+
table accesslog_%Y_%m@timestamp
|
229
|
+
...
|
230
|
+
</match>
|
231
|
+
```
|
232
|
+
|
233
|
+
If attribute name is given, the time to be used for formatting is value of each row.
|
234
|
+
The value for the time should be a UNIX time.
|
225
235
|
|
226
236
|
### Dynamic table creating
|
227
237
|
|
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
|
25
25
|
|
26
26
|
spec.add_runtime_dependency "google-api-client", "~> 0.8.0"
|
27
|
-
spec.add_runtime_dependency "googleauth"
|
27
|
+
spec.add_runtime_dependency "googleauth", ">= 0.5.0"
|
28
28
|
spec.add_runtime_dependency "fluentd"
|
29
29
|
spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
30
30
|
spec.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
|
@@ -83,6 +83,8 @@ module Fluent
|
|
83
83
|
config_param :replace_record_key, :bool, default: false
|
84
84
|
(1..REGEXP_MAX_NUM).each {|i| config_param :"replace_record_key_regexp#{i}", :string, default: nil }
|
85
85
|
|
86
|
+
config_param :convert_hash_to_json, :bool, default: false
|
87
|
+
|
86
88
|
config_param :time_format, :string, default: nil
|
87
89
|
config_param :localtime, :bool, default: nil
|
88
90
|
config_param :utc, :bool, default: nil
|
@@ -255,11 +257,11 @@ module Fluent
|
|
255
257
|
when 'json_key'
|
256
258
|
if File.exist?(@json_key)
|
257
259
|
auth = File.open(@json_key) do |f|
|
258
|
-
Google::Auth::ServiceAccountCredentials.
|
260
|
+
Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope)
|
259
261
|
end
|
260
262
|
else
|
261
263
|
key = StringIO.new(@json_key)
|
262
|
-
auth = Google::Auth::ServiceAccountCredentials.
|
264
|
+
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
263
265
|
end
|
264
266
|
|
265
267
|
when 'application_default'
|
@@ -276,8 +278,16 @@ module Fluent
|
|
276
278
|
@cached_client = client
|
277
279
|
end
|
278
280
|
|
279
|
-
def generate_table_id(table_id_format, current_time)
|
280
|
-
|
281
|
+
def generate_table_id(table_id_format, current_time, row)
|
282
|
+
format, col = table_id_format.split(/@/)
|
283
|
+
time = if col && row
|
284
|
+
keys = col.split('.')
|
285
|
+
t = keys.inject(row['json']) {|obj, attr| obj[attr] }
|
286
|
+
Time.at(t)
|
287
|
+
else
|
288
|
+
current_time
|
289
|
+
end
|
290
|
+
time.strftime(format)
|
281
291
|
end
|
282
292
|
|
283
293
|
def create_table(table_id)
|
@@ -318,8 +328,7 @@ module Fluent
|
|
318
328
|
end
|
319
329
|
end
|
320
330
|
|
321
|
-
def insert(
|
322
|
-
table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
|
331
|
+
def insert(table_id, rows)
|
323
332
|
res = client().execute(
|
324
333
|
api_method: @bq.tabledata.insert_all,
|
325
334
|
parameters: {
|
@@ -367,6 +376,15 @@ module Fluent
|
|
367
376
|
new_record
|
368
377
|
end
|
369
378
|
|
379
|
+
def convert_hash_to_json(record)
|
380
|
+
record.each do |key, value|
|
381
|
+
if value.class == Hash
|
382
|
+
record[key] = value.to_json
|
383
|
+
end
|
384
|
+
end
|
385
|
+
record
|
386
|
+
end
|
387
|
+
|
370
388
|
def format_stream(tag, es)
|
371
389
|
super
|
372
390
|
buf = ''
|
@@ -375,6 +393,10 @@ module Fluent
|
|
375
393
|
record = replace_record_key(record)
|
376
394
|
end
|
377
395
|
|
396
|
+
if @convert_hash_to_json
|
397
|
+
record = convert_hash_to_json(record)
|
398
|
+
end
|
399
|
+
|
378
400
|
row = @fields.format(@add_time_field.call(record, time))
|
379
401
|
unless row.empty?
|
380
402
|
row = {"json" => row}
|
@@ -394,17 +416,20 @@ module Fluent
|
|
394
416
|
|
395
417
|
# TODO: method
|
396
418
|
|
397
|
-
|
419
|
+
insert_table_format = @tables_mutex.synchronize do
|
398
420
|
t = @tables_queue.shift
|
399
421
|
@tables_queue.push t
|
400
422
|
t
|
401
423
|
end
|
402
|
-
|
424
|
+
|
425
|
+
rows.group_by {|row| generate_table_id(insert_table_format, Time.at(Fluent::Engine.now), row) }.each do |table_id, rows|
|
426
|
+
insert(table_id, rows)
|
427
|
+
end
|
403
428
|
end
|
404
429
|
|
405
430
|
def fetch_schema
|
406
431
|
table_id_format = @tablelist[0]
|
407
|
-
table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
|
432
|
+
table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now), nil)
|
408
433
|
res = client.execute(
|
409
434
|
api_method: @bq.tables.get,
|
410
435
|
parameters: {
|
@@ -105,7 +105,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
105
105
|
json_key_path = 'test/plugin/testdata/json_key.json'
|
106
106
|
authorization = Object.new
|
107
107
|
mock(authorization).fetch_access_token!
|
108
|
-
mock(Google::Auth::ServiceAccountCredentials).
|
108
|
+
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
|
109
109
|
|
110
110
|
mock.proxy(Google::APIClient).new.with_any_args {
|
111
111
|
mock!.__send__(:authorization=, authorization) {}
|
@@ -128,7 +128,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
128
128
|
mock(StringIO).new(json_key) { json_key_io }
|
129
129
|
authorization = Object.new
|
130
130
|
mock(authorization).fetch_access_token!
|
131
|
-
mock(Google::Auth::ServiceAccountCredentials).
|
131
|
+
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
|
132
132
|
|
133
133
|
mock.proxy(Google::APIClient).new.with_any_args {
|
134
134
|
mock!.__send__(:authorization=, authorization) {}
|
@@ -820,6 +820,60 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
820
820
|
assert_equal expected, MessagePack.unpack(buf)
|
821
821
|
end
|
822
822
|
|
823
|
+
def test_convert_hash_to_json
|
824
|
+
now = Time.now
|
825
|
+
input = [
|
826
|
+
now,
|
827
|
+
{
|
828
|
+
"vhost" => :bar,
|
829
|
+
"referer" => "http://referer.example",
|
830
|
+
"bot_access" => true,
|
831
|
+
"loginsession" => false,
|
832
|
+
"remote" => {
|
833
|
+
"host" => "remote.example",
|
834
|
+
"ip" => "192.0.2.1",
|
835
|
+
"port" => 12345,
|
836
|
+
"user" => "tagomoris",
|
837
|
+
}
|
838
|
+
}
|
839
|
+
]
|
840
|
+
expected = {
|
841
|
+
"json" => {
|
842
|
+
"time" => now.to_i,
|
843
|
+
"vhost" => "bar",
|
844
|
+
"referer" => "http://referer.example",
|
845
|
+
"bot_access" => true,
|
846
|
+
"loginsession" => false,
|
847
|
+
"remote" => "{\"host\":\"remote.example\",\"ip\":\"192.0.2.1\",\"port\":12345,\"user\":\"tagomoris\"}"
|
848
|
+
}
|
849
|
+
}
|
850
|
+
|
851
|
+
driver = create_driver(<<-CONFIG)
|
852
|
+
table foo
|
853
|
+
email foo@bar.example
|
854
|
+
private_key_path /path/to/key
|
855
|
+
project yourproject_id
|
856
|
+
dataset yourdataset_id
|
857
|
+
|
858
|
+
convert_hash_to_json true
|
859
|
+
|
860
|
+
time_format %s
|
861
|
+
time_field time
|
862
|
+
|
863
|
+
field_integer time
|
864
|
+
field_string vhost, referer, remote
|
865
|
+
field_boolean bot_access, loginsession
|
866
|
+
CONFIG
|
867
|
+
mock_client(driver) do |expect|
|
868
|
+
expect.discovered_api("bigquery", "v2") { stub! }
|
869
|
+
end
|
870
|
+
driver.instance.start
|
871
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
872
|
+
driver.instance.shutdown
|
873
|
+
|
874
|
+
assert_equal expected, MessagePack.unpack(buf)
|
875
|
+
end
|
876
|
+
|
823
877
|
def test_write
|
824
878
|
entry = {"json" => {"a" => "b"}}, {"json" => {"b" => "c"}}
|
825
879
|
driver = create_driver(CONFIG)
|
@@ -846,14 +900,90 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
846
900
|
driver.instance.shutdown
|
847
901
|
end
|
848
902
|
|
849
|
-
def
|
903
|
+
def test_write_with_row_based_table_id_formatting
|
904
|
+
entry = [
|
905
|
+
{"json" => {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).to_i}},
|
906
|
+
{"json" => {"b" => "c", "created_at" => Time.local(2014,8,21,9,0,0).to_i}}
|
907
|
+
]
|
908
|
+
driver = create_driver(<<-CONFIG)
|
909
|
+
table foo_%Y_%m_%d@created_at
|
910
|
+
email foo@bar.example
|
911
|
+
private_key_path /path/to/key
|
912
|
+
project yourproject_id
|
913
|
+
dataset yourdataset_id
|
914
|
+
|
915
|
+
time_format %s
|
916
|
+
time_field time
|
917
|
+
|
918
|
+
field_integer time,status,bytes
|
919
|
+
field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
|
920
|
+
field_float requesttime
|
921
|
+
field_boolean bot_access,loginsession
|
922
|
+
CONFIG
|
923
|
+
mock_client(driver) do |expect|
|
924
|
+
expect.discovered_api("bigquery", "v2") { mock!.tabledata.times(2).mock!.insert_all.times(2) { Object.new } }
|
925
|
+
|
926
|
+
expect.execute(
|
927
|
+
:api_method => anything,
|
928
|
+
:parameters => {
|
929
|
+
'projectId' => 'yourproject_id',
|
930
|
+
'datasetId' => 'yourdataset_id',
|
931
|
+
'tableId' => 'foo_2014_08_20',
|
932
|
+
},
|
933
|
+
:body_object => {
|
934
|
+
'rows' => [entry[0]]
|
935
|
+
}
|
936
|
+
) { stub!.success? { true } }
|
937
|
+
|
938
|
+
expect.execute(
|
939
|
+
:api_method => anything,
|
940
|
+
:parameters => {
|
941
|
+
'projectId' => 'yourproject_id',
|
942
|
+
'datasetId' => 'yourdataset_id',
|
943
|
+
'tableId' => 'foo_2014_08_21',
|
944
|
+
},
|
945
|
+
:body_object => {
|
946
|
+
'rows' => [entry[1]]
|
947
|
+
}
|
948
|
+
) { stub!.success? { true } }
|
949
|
+
end
|
950
|
+
|
951
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
952
|
+
entry.each do |object|
|
953
|
+
chunk << object.to_msgpack
|
954
|
+
end
|
955
|
+
|
956
|
+
driver.instance.start
|
957
|
+
driver.instance.write(chunk)
|
958
|
+
driver.instance.shutdown
|
959
|
+
end
|
960
|
+
|
961
|
+
def test_generate_table_id_without_row
|
850
962
|
driver = create_driver
|
851
963
|
table_id_format = 'foo_%Y_%m_%d'
|
852
964
|
time = Time.local(2014, 8, 11, 21, 20, 56)
|
853
|
-
table_id = driver.instance.generate_table_id(table_id_format, time)
|
965
|
+
table_id = driver.instance.generate_table_id(table_id_format, time, nil)
|
854
966
|
assert_equal 'foo_2014_08_11', table_id
|
855
967
|
end
|
856
968
|
|
969
|
+
def test_generate_table_id_with_row
|
970
|
+
driver = create_driver
|
971
|
+
table_id_format = 'foo_%Y_%m_%d@created_at'
|
972
|
+
time = Time.local(2014, 8, 11, 21, 20, 56)
|
973
|
+
row = { "json" => { "created_at" => Time.local(2014,8,10,21,20,57).to_i } }
|
974
|
+
table_id = driver.instance.generate_table_id(table_id_format, time, row)
|
975
|
+
assert_equal 'foo_2014_08_10', table_id
|
976
|
+
end
|
977
|
+
|
978
|
+
def test_generate_table_id_with_row_nested_attribute
|
979
|
+
driver = create_driver
|
980
|
+
table_id_format = 'foo_%Y_%m_%d@foo.bar.created_at'
|
981
|
+
time = Time.local(2014, 8, 11, 21, 20, 56)
|
982
|
+
row = { "json" => { "foo" => { "bar" => { "created_at" => Time.local(2014,8,10,21,20,57).to_i } } } }
|
983
|
+
table_id = driver.instance.generate_table_id(table_id_format, time, row)
|
984
|
+
assert_equal 'foo_2014_08_10', table_id
|
985
|
+
end
|
986
|
+
|
857
987
|
def test_auto_create_table_by_bigquery_api
|
858
988
|
now = Time.now
|
859
989
|
message = {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 0.5.0
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 0.5.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: fluentd
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|