fluent-plugin-bigquery 1.2.0 → 2.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,7 +10,9 @@ require 'fluent/plugin/buf_memory'
10
10
  require 'fluent/plugin/buf_file'
11
11
  require 'fluent/test/driver/output'
12
12
 
13
- require 'fluent/plugin/out_bigquery'
13
+ require 'fluent/plugin/out_bigquery_base'
14
+ require 'fluent/plugin/out_bigquery_insert'
15
+ require 'fluent/plugin/out_bigquery_load'
14
16
  require 'google/apis/bigquery_v2'
15
17
  require 'google/api_client/auth/key_utils'
16
18
  require 'googleauth'
@@ -0,0 +1,579 @@
1
+ require 'helper'
2
+
3
+ class BigQueryBaseOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ table foo
10
+ email foo@bar.example
11
+ private_key_path /path/to/key
12
+ project yourproject_id
13
+ dataset yourdataset_id
14
+
15
+ <inject>
16
+ time_format %s
17
+ time_key time
18
+ </inject>
19
+
20
+ schema [
21
+ {"name": "time", "type": "INTEGER"},
22
+ {"name": "status", "type": "INTEGER"},
23
+ {"name": "bytes", "type": "INTEGER"},
24
+ {"name": "vhost", "type": "STRING"},
25
+ {"name": "path", "type": "STRING"},
26
+ {"name": "method", "type": "STRING"},
27
+ {"name": "protocol", "type": "STRING"},
28
+ {"name": "agent", "type": "STRING"},
29
+ {"name": "referer", "type": "STRING"},
30
+ {"name": "remote", "type": "RECORD", "fields": [
31
+ {"name": "host", "type": "STRING"},
32
+ {"name": "ip", "type": "STRING"},
33
+ {"name": "user", "type": "STRING"}
34
+ ]},
35
+ {"name": "requesttime", "type": "FLOAT"},
36
+ {"name": "bot_access", "type": "BOOLEAN"},
37
+ {"name": "loginsession", "type": "BOOLEAN"}
38
+ ]
39
+ ]
40
+
41
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
+
43
+ def create_driver(conf = CONFIG)
44
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryBaseOutput).configure(conf)
45
+ end
46
+
47
+ def stub_writer(stub_auth: true)
48
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
+ stub(writer).get_auth { nil } if stub_auth
50
+ yield writer
51
+ writer
52
+ end
53
+ end
54
+
55
+ private def sudo_schema_response
56
+ {
57
+ "schema" => {
58
+ "fields" => [
59
+ {
60
+ "name" => "time",
61
+ "type" => "TIMESTAMP",
62
+ "mode" => "REQUIRED"
63
+ },
64
+ {
65
+ "name" => "tty",
66
+ "type" => "STRING",
67
+ "mode" => "NULLABLE"
68
+ },
69
+ {
70
+ "name" => "pwd",
71
+ "type" => "STRING",
72
+ "mode" => "REQUIRED"
73
+ },
74
+ {
75
+ "name" => "user",
76
+ "type" => "STRING",
77
+ "mode" => "REQUIRED"
78
+ },
79
+ {
80
+ "name" => "argv",
81
+ "type" => "STRING",
82
+ "mode" => "REPEATED"
83
+ }
84
+ ]
85
+ }
86
+ }
87
+ end
88
+
89
+ def test_configure_table
90
+ driver = create_driver
91
+ assert_equal driver.instance.table, 'foo'
92
+ assert_nil driver.instance.tables
93
+
94
+ driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
95
+ assert_nil driver.instance.table
96
+ assert_equal driver.instance.tables, ['foo' ,'bar']
97
+
98
+ assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
99
+ create_driver(CONFIG + "tables foo,bar")
100
+ }
101
+ end
102
+
103
+ def test_configure_auth_private_key
104
+ driver = create_driver
105
+ stub_writer(stub_auth: false) do |writer|
106
+ mock(writer).get_auth_from_private_key { stub! }
107
+ end
108
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
109
+ end
110
+
111
+ def test_configure_auth_compute_engine
112
+ driver = create_driver(%[
113
+ table foo
114
+ auth_method compute_engine
115
+ project yourproject_id
116
+ dataset yourdataset_id
117
+ schema [
118
+ {"name": "time", "type": "INTEGER"},
119
+ {"name": "status", "type": "INTEGER"},
120
+ {"name": "bytes", "type": "INTEGER"}
121
+ ]
122
+ ])
123
+
124
+ stub_writer(stub_auth: false) do |writer|
125
+ mock(writer).get_auth_from_compute_engine { stub! }
126
+ end
127
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
128
+ end
129
+
130
+ def test_configure_auth_json_key_as_file
131
+ driver = create_driver(%[
132
+ table foo
133
+ auth_method json_key
134
+ json_key jsonkey.josn
135
+ project yourproject_id
136
+ dataset yourdataset_id
137
+ schema [
138
+ {"name": "time", "type": "INTEGER"},
139
+ {"name": "status", "type": "INTEGER"},
140
+ {"name": "bytes", "type": "INTEGER"}
141
+ ]
142
+ ])
143
+
144
+ stub_writer(stub_auth: false) do |writer|
145
+ mock(writer).get_auth_from_json_key { stub! }
146
+ end
147
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
+ end
149
+
150
+ def test_configure_auth_json_key_as_file_raise_permission_error
151
+ json_key_path = 'test/plugin/testdata/json_key.json'
152
+ json_key_path_dir = File.dirname(json_key_path)
153
+
154
+ begin
155
+ File.chmod(0000, json_key_path_dir)
156
+
157
+ driver = create_driver(%[
158
+ table foo
159
+ auth_method json_key
160
+ json_key #{json_key_path}
161
+ project yourproject_id
162
+ dataset yourdataset_id
163
+ schema [
164
+ {"name": "time", "type": "INTEGER"},
165
+ {"name": "status", "type": "INTEGER"},
166
+ {"name": "bytes", "type": "INTEGER"}
167
+ ]
168
+ ])
169
+ assert_raises(Errno::EACCES) do
170
+ driver.instance.writer.client
171
+ end
172
+ ensure
173
+ File.chmod(0755, json_key_path_dir)
174
+ end
175
+ end
176
+
177
+ def test_configure_auth_json_key_as_string
178
+ json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
+ json_key_io = StringIO.new(json_key)
180
+ authorization = Object.new
181
+ stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
182
+
183
+ driver = create_driver(%[
184
+ table foo
185
+ auth_method json_key
186
+ json_key #{json_key}
187
+ project yourproject_id
188
+ dataset yourdataset_id
189
+ schema [
190
+ {"name": "time", "type": "INTEGER"},
191
+ {"name": "status", "type": "INTEGER"},
192
+ {"name": "bytes", "type": "INTEGER"}
193
+ ]
194
+ ])
195
+ stub_writer(stub_auth: false) do |writer|
196
+ mock.proxy(writer).get_auth_from_json_key { stub! }
197
+ end
198
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
199
+ end
200
+
201
+ def test_configure_auth_application_default
202
+ driver = create_driver(%[
203
+ table foo
204
+ auth_method application_default
205
+ project yourproject_id
206
+ dataset yourdataset_id
207
+ schema [
208
+ {"name": "time", "type": "INTEGER"},
209
+ {"name": "status", "type": "INTEGER"},
210
+ {"name": "bytes", "type": "INTEGER"}
211
+ ]
212
+ ])
213
+
214
+ stub_writer(stub_auth: false) do |writer|
215
+ mock.proxy(writer).get_auth_from_application_default { stub! }
216
+ end
217
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
218
+ end
219
+
220
+ def test_format
221
+ now = Fluent::EventTime.new(Time.now.to_i)
222
+ input = {
223
+ "status" => "1",
224
+ "bytes" => 3.0,
225
+ "vhost" => :bar,
226
+ "path" => "/path/to/baz",
227
+ "method" => "GET",
228
+ "protocol" => "HTTP/0.9",
229
+ "agent" => "libwww",
230
+ "referer" => "http://referer.example",
231
+ "requesttime" => (now - 1).to_f.to_s,
232
+ "bot_access" => true,
233
+ "loginsession" => false,
234
+ "something-else" => "would be ignored",
235
+ "yet-another" => {
236
+ "foo" => "bar",
237
+ "baz" => 1,
238
+ },
239
+ "remote" => {
240
+ "host" => "remote.example",
241
+ "ip" => "192.0.2.1",
242
+ "port" => 12345,
243
+ "user" => "tagomoris",
244
+ }
245
+ }
246
+ expected = {
247
+ "time" => now.to_i,
248
+ "status" => 1,
249
+ "bytes" => 3,
250
+ "vhost" => "bar",
251
+ "path" => "/path/to/baz",
252
+ "method" => "GET",
253
+ "protocol" => "HTTP/0.9",
254
+ "agent" => "libwww",
255
+ "referer" => "http://referer.example",
256
+ "requesttime" => (now - 1).to_f.to_s.to_f,
257
+ "bot_access" => true,
258
+ "loginsession" => false,
259
+ "something-else" => "would be ignored",
260
+ "yet-another" => {
261
+ "foo" => "bar",
262
+ "baz" => 1,
263
+ },
264
+ "remote" => {
265
+ "host" => "remote.example",
266
+ "ip" => "192.0.2.1",
267
+ "port" => 12345,
268
+ "user" => "tagomoris",
269
+ }
270
+ }
271
+
272
+ driver = create_driver(CONFIG)
273
+ buf = nil
274
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
275
+
276
+ assert_equal expected, MultiJson.load(buf)
277
+ end
278
+
279
+ [
280
+ # <time_format>, <time field type>, <time expectation generator>, <assertion>
281
+ [
282
+ "%s.%6N",
283
+ lambda{|t| t.strftime("%s.%6N").to_f },
284
+ lambda{|recv, expected, actual|
285
+ recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
286
+ }
287
+ ],
288
+ [
289
+ "%Y-%m-%dT%H:%M:%S%:z",
290
+ lambda{|t| t.iso8601 },
291
+ :assert_equal.to_proc
292
+ ],
293
+ ].each do |format, expect_time, assert|
294
+ define_method("test_time_formats_#{format}") do
295
+ now = Fluent::Engine.now
296
+ input = {}
297
+ expected = { "time" => expect_time[Time.at(now.to_r)] }
298
+
299
+ driver = create_driver(<<-CONFIG)
300
+ table foo
301
+ email foo@bar.example
302
+ private_key_path /path/to/key
303
+ project yourproject_id
304
+ dataset yourdataset_id
305
+
306
+ <inject>
307
+ time_format #{format}
308
+ time_type string
309
+ time_key time
310
+ </inject>
311
+
312
+ schema [
313
+ {"name": "metadata", "type": "RECORD", "fields": [
314
+ {"name": "time", "type": "INTEGER"},
315
+ {"name": "node", "type": "STRING"}
316
+ ]},
317
+ {"name": "log", "type": "STRING"}
318
+ ]
319
+ CONFIG
320
+
321
+ buf = nil
322
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
323
+
324
+ assert[self, expected["time"], MultiJson.load(buf)["time"]]
325
+ end
326
+ end
327
+
328
+ def test_format_with_schema
329
+ now = Fluent::EventTime.new(Time.now.to_i)
330
+ input = {
331
+ "request" => {
332
+ "vhost" => :bar,
333
+ "path" => "/path/to/baz",
334
+ "method" => "GET",
335
+ "protocol" => "HTTP/0.9",
336
+ "agent" => "libwww",
337
+ "referer" => "http://referer.example",
338
+ "time" => (now - 1).to_f,
339
+ "bot_access" => true,
340
+ "loginsession" => false,
341
+ },
342
+ "response" => {
343
+ "status" => "1",
344
+ "bytes" => 3.0,
345
+ },
346
+ "remote" => {
347
+ "host" => "remote.example",
348
+ "ip" => "192.0.2.1",
349
+ "port" => 12345,
350
+ "user" => "tagomoris",
351
+ },
352
+ "something-else" => "would be ignored",
353
+ "yet-another" => {
354
+ "foo" => "bar",
355
+ "baz" => 1,
356
+ },
357
+ }
358
+ expected = {
359
+ "time" => now.to_f,
360
+ "request" => {
361
+ "vhost" => "bar",
362
+ "path" => "/path/to/baz",
363
+ "method" => "GET",
364
+ "protocol" => "HTTP/0.9",
365
+ "agent" => "libwww",
366
+ "referer" => "http://referer.example",
367
+ "time" => (now - 1).to_f,
368
+ "bot_access" => true,
369
+ "loginsession" => false,
370
+ },
371
+ "remote" => {
372
+ "host" => "remote.example",
373
+ "ip" => "192.0.2.1",
374
+ "port" => 12345,
375
+ "user" => "tagomoris",
376
+ },
377
+ "response" => {
378
+ "status" => 1,
379
+ "bytes" => 3,
380
+ },
381
+ "something-else" => "would be ignored",
382
+ "yet-another" => {
383
+ "foo" => "bar",
384
+ "baz" => 1,
385
+ },
386
+ }
387
+
388
+ driver = create_driver(<<-CONFIG)
389
+ table foo
390
+ email foo@bar.example
391
+ private_key_path /path/to/key
392
+ project yourproject_id
393
+ dataset yourdataset_id
394
+
395
+ <inject>
396
+ time_format %s
397
+ time_key time
398
+ </inject>
399
+
400
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
401
+ schema [{"name": "time", "type": "INTEGER"}]
402
+ CONFIG
403
+
404
+ buf = nil
405
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
406
+
407
+ assert_equal expected, MultiJson.load(buf)
408
+ end
409
+
410
+ def test_format_repeated_field_with_schema
411
+ now = Fluent::EventTime.new(Time.now.to_i)
412
+ input = {
413
+ "tty" => nil,
414
+ "pwd" => "/home/yugui",
415
+ "user" => "fluentd",
416
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
417
+ }
418
+ expected = {
419
+ "time" => now.to_f,
420
+ "pwd" => "/home/yugui",
421
+ "user" => "fluentd",
422
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
423
+ }
424
+
425
+ driver = create_driver(<<-CONFIG)
426
+ table foo
427
+ email foo@bar.example
428
+ private_key_path /path/to/key
429
+ project yourproject_id
430
+ dataset yourdataset_id
431
+
432
+ <inject>
433
+ time_format %s
434
+ time_key time
435
+ </inject>
436
+
437
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
438
+ schema [{"name": "time", "type": "INTEGER"}]
439
+ CONFIG
440
+
441
+ buf = nil
442
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
443
+
444
+ assert_equal expected, MultiJson.load(buf)
445
+ end
446
+
447
+ def test_format_fetch_from_bigquery_api
448
+ now = Fluent::EventTime.new(Time.now.to_i)
449
+ input = {
450
+ "tty" => nil,
451
+ "pwd" => "/home/yugui",
452
+ "user" => "fluentd",
453
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
454
+ }
455
+ expected = {
456
+ "time" => now.to_i,
457
+ "pwd" => "/home/yugui",
458
+ "user" => "fluentd",
459
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
460
+ }
461
+
462
+ driver = create_driver(<<-CONFIG)
463
+ table foo
464
+ email foo@bar.example
465
+ private_key_path /path/to/key
466
+ project yourproject_id
467
+ dataset yourdataset_id
468
+
469
+ <inject>
470
+ time_format %s
471
+ time_key time
472
+ </inject>
473
+
474
+ fetch_schema true
475
+ schema [{"name": "time", "type": "INTEGER"}]
476
+ CONFIG
477
+
478
+ stub_writer do |writer|
479
+ mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
480
+ sudo_schema_response["schema"]["fields"]
481
+ end
482
+ end
483
+
484
+ buf = nil
485
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
486
+
487
+ assert_equal expected, MultiJson.load(buf)
488
+
489
+ table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
490
+ assert table_schema["time"]
491
+ assert_equal :timestamp, table_schema["time"].type
492
+ assert_equal :required, table_schema["time"].mode
493
+
494
+ assert table_schema["tty"]
495
+ assert_equal :string, table_schema["tty"].type
496
+ assert_equal :nullable, table_schema["tty"].mode
497
+
498
+ assert table_schema["pwd"]
499
+ assert_equal :string, table_schema["pwd"].type
500
+ assert_equal :required, table_schema["pwd"].mode
501
+
502
+ assert table_schema["user"]
503
+ assert_equal :string, table_schema["user"].type
504
+ assert_equal :required, table_schema["user"].mode
505
+
506
+ assert table_schema["argv"]
507
+ assert_equal :string, table_schema["argv"].type
508
+ assert_equal :repeated, table_schema["argv"].mode
509
+ end
510
+
511
+ def test_format_fetch_from_bigquery_api_with_fetch_schema_table
512
+ now = Fluent::EventTime.new(Time.now.to_i)
513
+ input = {
514
+ "tty" => nil,
515
+ "pwd" => "/home/yugui",
516
+ "user" => "fluentd",
517
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
518
+ }
519
+ expected = {
520
+ "time" => now.to_i,
521
+ "pwd" => "/home/yugui",
522
+ "user" => "fluentd",
523
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
524
+ }
525
+
526
+ driver = create_driver(<<-CONFIG)
527
+ table foo_%Y_%m_%d
528
+ email foo@bar.example
529
+ private_key_path /path/to/key
530
+ project yourproject_id
531
+ dataset yourdataset_id
532
+
533
+ <inject>
534
+ time_format %s
535
+ time_key time
536
+ </inject>
537
+
538
+ fetch_schema true
539
+ fetch_schema_table foo
540
+ schema [{"name": "time", "type": "INTEGER"}]
541
+
542
+ <buffer time>
543
+ timekey 1d
544
+ </buffer>
545
+ CONFIG
546
+
547
+ stub_writer do |writer|
548
+ mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
549
+ sudo_schema_response["schema"]["fields"]
550
+ end
551
+ end
552
+
553
+ buf = nil
554
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
555
+
556
+ assert_equal expected, MultiJson.load(buf)
557
+
558
+ table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
559
+ assert table_schema["time"]
560
+ assert_equal :timestamp, table_schema["time"].type
561
+ assert_equal :required, table_schema["time"].mode
562
+
563
+ assert table_schema["tty"]
564
+ assert_equal :string, table_schema["tty"].type
565
+ assert_equal :nullable, table_schema["tty"].mode
566
+
567
+ assert table_schema["pwd"]
568
+ assert_equal :string, table_schema["pwd"].type
569
+ assert_equal :required, table_schema["pwd"].mode
570
+
571
+ assert table_schema["user"]
572
+ assert_equal :string, table_schema["user"].type
573
+ assert_equal :required, table_schema["user"].mode
574
+
575
+ assert table_schema["argv"]
576
+ assert_equal :string, table_schema["argv"].type
577
+ assert_equal :repeated, table_schema["argv"].mode
578
+ end
579
+ end