fluent-plugin-bigquery 1.2.0 → 2.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,9 @@ require 'fluent/plugin/buf_memory'
10
10
  require 'fluent/plugin/buf_file'
11
11
  require 'fluent/test/driver/output'
12
12
 
13
- require 'fluent/plugin/out_bigquery'
13
+ require 'fluent/plugin/out_bigquery_base'
14
+ require 'fluent/plugin/out_bigquery_insert'
15
+ require 'fluent/plugin/out_bigquery_load'
14
16
  require 'google/apis/bigquery_v2'
15
17
  require 'google/api_client/auth/key_utils'
16
18
  require 'googleauth'
@@ -0,0 +1,579 @@
1
+ require 'helper'
2
+
3
+ class BigQueryBaseOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ table foo
10
+ email foo@bar.example
11
+ private_key_path /path/to/key
12
+ project yourproject_id
13
+ dataset yourdataset_id
14
+
15
+ <inject>
16
+ time_format %s
17
+ time_key time
18
+ </inject>
19
+
20
+ schema [
21
+ {"name": "time", "type": "INTEGER"},
22
+ {"name": "status", "type": "INTEGER"},
23
+ {"name": "bytes", "type": "INTEGER"},
24
+ {"name": "vhost", "type": "STRING"},
25
+ {"name": "path", "type": "STRING"},
26
+ {"name": "method", "type": "STRING"},
27
+ {"name": "protocol", "type": "STRING"},
28
+ {"name": "agent", "type": "STRING"},
29
+ {"name": "referer", "type": "STRING"},
30
+ {"name": "remote", "type": "RECORD", "fields": [
31
+ {"name": "host", "type": "STRING"},
32
+ {"name": "ip", "type": "STRING"},
33
+ {"name": "user", "type": "STRING"}
34
+ ]},
35
+ {"name": "requesttime", "type": "FLOAT"},
36
+ {"name": "bot_access", "type": "BOOLEAN"},
37
+ {"name": "loginsession", "type": "BOOLEAN"}
38
+ ]
39
+ ]
40
+
41
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
+
43
+ def create_driver(conf = CONFIG)
44
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryBaseOutput).configure(conf)
45
+ end
46
+
47
+ def stub_writer(stub_auth: true)
48
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
+ stub(writer).get_auth { nil } if stub_auth
50
+ yield writer
51
+ writer
52
+ end
53
+ end
54
+
55
+ private def sudo_schema_response
56
+ {
57
+ "schema" => {
58
+ "fields" => [
59
+ {
60
+ "name" => "time",
61
+ "type" => "TIMESTAMP",
62
+ "mode" => "REQUIRED"
63
+ },
64
+ {
65
+ "name" => "tty",
66
+ "type" => "STRING",
67
+ "mode" => "NULLABLE"
68
+ },
69
+ {
70
+ "name" => "pwd",
71
+ "type" => "STRING",
72
+ "mode" => "REQUIRED"
73
+ },
74
+ {
75
+ "name" => "user",
76
+ "type" => "STRING",
77
+ "mode" => "REQUIRED"
78
+ },
79
+ {
80
+ "name" => "argv",
81
+ "type" => "STRING",
82
+ "mode" => "REPEATED"
83
+ }
84
+ ]
85
+ }
86
+ }
87
+ end
88
+
89
+ def test_configure_table
90
+ driver = create_driver
91
+ assert_equal driver.instance.table, 'foo'
92
+ assert_nil driver.instance.tables
93
+
94
+ driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
95
+ assert_nil driver.instance.table
96
+ assert_equal driver.instance.tables, ['foo' ,'bar']
97
+
98
+ assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
99
+ create_driver(CONFIG + "tables foo,bar")
100
+ }
101
+ end
102
+
103
+ def test_configure_auth_private_key
104
+ driver = create_driver
105
+ stub_writer(stub_auth: false) do |writer|
106
+ mock(writer).get_auth_from_private_key { stub! }
107
+ end
108
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
109
+ end
110
+
111
+ def test_configure_auth_compute_engine
112
+ driver = create_driver(%[
113
+ table foo
114
+ auth_method compute_engine
115
+ project yourproject_id
116
+ dataset yourdataset_id
117
+ schema [
118
+ {"name": "time", "type": "INTEGER"},
119
+ {"name": "status", "type": "INTEGER"},
120
+ {"name": "bytes", "type": "INTEGER"}
121
+ ]
122
+ ])
123
+
124
+ stub_writer(stub_auth: false) do |writer|
125
+ mock(writer).get_auth_from_compute_engine { stub! }
126
+ end
127
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
128
+ end
129
+
130
+ def test_configure_auth_json_key_as_file
131
+ driver = create_driver(%[
132
+ table foo
133
+ auth_method json_key
134
+ json_key jsonkey.josn
135
+ project yourproject_id
136
+ dataset yourdataset_id
137
+ schema [
138
+ {"name": "time", "type": "INTEGER"},
139
+ {"name": "status", "type": "INTEGER"},
140
+ {"name": "bytes", "type": "INTEGER"}
141
+ ]
142
+ ])
143
+
144
+ stub_writer(stub_auth: false) do |writer|
145
+ mock(writer).get_auth_from_json_key { stub! }
146
+ end
147
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
+ end
149
+
150
+ def test_configure_auth_json_key_as_file_raise_permission_error
151
+ json_key_path = 'test/plugin/testdata/json_key.json'
152
+ json_key_path_dir = File.dirname(json_key_path)
153
+
154
+ begin
155
+ File.chmod(0000, json_key_path_dir)
156
+
157
+ driver = create_driver(%[
158
+ table foo
159
+ auth_method json_key
160
+ json_key #{json_key_path}
161
+ project yourproject_id
162
+ dataset yourdataset_id
163
+ schema [
164
+ {"name": "time", "type": "INTEGER"},
165
+ {"name": "status", "type": "INTEGER"},
166
+ {"name": "bytes", "type": "INTEGER"}
167
+ ]
168
+ ])
169
+ assert_raises(Errno::EACCES) do
170
+ driver.instance.writer.client
171
+ end
172
+ ensure
173
+ File.chmod(0755, json_key_path_dir)
174
+ end
175
+ end
176
+
177
+ def test_configure_auth_json_key_as_string
178
+ json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
+ json_key_io = StringIO.new(json_key)
180
+ authorization = Object.new
181
+ stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
182
+
183
+ driver = create_driver(%[
184
+ table foo
185
+ auth_method json_key
186
+ json_key #{json_key}
187
+ project yourproject_id
188
+ dataset yourdataset_id
189
+ schema [
190
+ {"name": "time", "type": "INTEGER"},
191
+ {"name": "status", "type": "INTEGER"},
192
+ {"name": "bytes", "type": "INTEGER"}
193
+ ]
194
+ ])
195
+ stub_writer(stub_auth: false) do |writer|
196
+ mock.proxy(writer).get_auth_from_json_key { stub! }
197
+ end
198
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
199
+ end
200
+
201
+ def test_configure_auth_application_default
202
+ driver = create_driver(%[
203
+ table foo
204
+ auth_method application_default
205
+ project yourproject_id
206
+ dataset yourdataset_id
207
+ schema [
208
+ {"name": "time", "type": "INTEGER"},
209
+ {"name": "status", "type": "INTEGER"},
210
+ {"name": "bytes", "type": "INTEGER"}
211
+ ]
212
+ ])
213
+
214
+ stub_writer(stub_auth: false) do |writer|
215
+ mock.proxy(writer).get_auth_from_application_default { stub! }
216
+ end
217
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
218
+ end
219
+
220
+ def test_format
221
+ now = Fluent::EventTime.new(Time.now.to_i)
222
+ input = {
223
+ "status" => "1",
224
+ "bytes" => 3.0,
225
+ "vhost" => :bar,
226
+ "path" => "/path/to/baz",
227
+ "method" => "GET",
228
+ "protocol" => "HTTP/0.9",
229
+ "agent" => "libwww",
230
+ "referer" => "http://referer.example",
231
+ "requesttime" => (now - 1).to_f.to_s,
232
+ "bot_access" => true,
233
+ "loginsession" => false,
234
+ "something-else" => "would be ignored",
235
+ "yet-another" => {
236
+ "foo" => "bar",
237
+ "baz" => 1,
238
+ },
239
+ "remote" => {
240
+ "host" => "remote.example",
241
+ "ip" => "192.0.2.1",
242
+ "port" => 12345,
243
+ "user" => "tagomoris",
244
+ }
245
+ }
246
+ expected = {
247
+ "time" => now.to_i,
248
+ "status" => 1,
249
+ "bytes" => 3,
250
+ "vhost" => "bar",
251
+ "path" => "/path/to/baz",
252
+ "method" => "GET",
253
+ "protocol" => "HTTP/0.9",
254
+ "agent" => "libwww",
255
+ "referer" => "http://referer.example",
256
+ "requesttime" => (now - 1).to_f.to_s.to_f,
257
+ "bot_access" => true,
258
+ "loginsession" => false,
259
+ "something-else" => "would be ignored",
260
+ "yet-another" => {
261
+ "foo" => "bar",
262
+ "baz" => 1,
263
+ },
264
+ "remote" => {
265
+ "host" => "remote.example",
266
+ "ip" => "192.0.2.1",
267
+ "port" => 12345,
268
+ "user" => "tagomoris",
269
+ }
270
+ }
271
+
272
+ driver = create_driver(CONFIG)
273
+ buf = nil
274
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
275
+
276
+ assert_equal expected, MultiJson.load(buf)
277
+ end
278
+
279
+ [
280
+ # <time_format>, <time field type>, <time expectation generator>, <assertion>
281
+ [
282
+ "%s.%6N",
283
+ lambda{|t| t.strftime("%s.%6N").to_f },
284
+ lambda{|recv, expected, actual|
285
+ recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
286
+ }
287
+ ],
288
+ [
289
+ "%Y-%m-%dT%H:%M:%S%:z",
290
+ lambda{|t| t.iso8601 },
291
+ :assert_equal.to_proc
292
+ ],
293
+ ].each do |format, expect_time, assert|
294
+ define_method("test_time_formats_#{format}") do
295
+ now = Fluent::Engine.now
296
+ input = {}
297
+ expected = { "time" => expect_time[Time.at(now.to_r)] }
298
+
299
+ driver = create_driver(<<-CONFIG)
300
+ table foo
301
+ email foo@bar.example
302
+ private_key_path /path/to/key
303
+ project yourproject_id
304
+ dataset yourdataset_id
305
+
306
+ <inject>
307
+ time_format #{format}
308
+ time_type string
309
+ time_key time
310
+ </inject>
311
+
312
+ schema [
313
+ {"name": "metadata", "type": "RECORD", "fields": [
314
+ {"name": "time", "type": "INTEGER"},
315
+ {"name": "node", "type": "STRING"}
316
+ ]},
317
+ {"name": "log", "type": "STRING"}
318
+ ]
319
+ CONFIG
320
+
321
+ buf = nil
322
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
323
+
324
+ assert[self, expected["time"], MultiJson.load(buf)["time"]]
325
+ end
326
+ end
327
+
328
+ def test_format_with_schema
329
+ now = Fluent::EventTime.new(Time.now.to_i)
330
+ input = {
331
+ "request" => {
332
+ "vhost" => :bar,
333
+ "path" => "/path/to/baz",
334
+ "method" => "GET",
335
+ "protocol" => "HTTP/0.9",
336
+ "agent" => "libwww",
337
+ "referer" => "http://referer.example",
338
+ "time" => (now - 1).to_f,
339
+ "bot_access" => true,
340
+ "loginsession" => false,
341
+ },
342
+ "response" => {
343
+ "status" => "1",
344
+ "bytes" => 3.0,
345
+ },
346
+ "remote" => {
347
+ "host" => "remote.example",
348
+ "ip" => "192.0.2.1",
349
+ "port" => 12345,
350
+ "user" => "tagomoris",
351
+ },
352
+ "something-else" => "would be ignored",
353
+ "yet-another" => {
354
+ "foo" => "bar",
355
+ "baz" => 1,
356
+ },
357
+ }
358
+ expected = {
359
+ "time" => now.to_f,
360
+ "request" => {
361
+ "vhost" => "bar",
362
+ "path" => "/path/to/baz",
363
+ "method" => "GET",
364
+ "protocol" => "HTTP/0.9",
365
+ "agent" => "libwww",
366
+ "referer" => "http://referer.example",
367
+ "time" => (now - 1).to_f,
368
+ "bot_access" => true,
369
+ "loginsession" => false,
370
+ },
371
+ "remote" => {
372
+ "host" => "remote.example",
373
+ "ip" => "192.0.2.1",
374
+ "port" => 12345,
375
+ "user" => "tagomoris",
376
+ },
377
+ "response" => {
378
+ "status" => 1,
379
+ "bytes" => 3,
380
+ },
381
+ "something-else" => "would be ignored",
382
+ "yet-another" => {
383
+ "foo" => "bar",
384
+ "baz" => 1,
385
+ },
386
+ }
387
+
388
+ driver = create_driver(<<-CONFIG)
389
+ table foo
390
+ email foo@bar.example
391
+ private_key_path /path/to/key
392
+ project yourproject_id
393
+ dataset yourdataset_id
394
+
395
+ <inject>
396
+ time_format %s
397
+ time_key time
398
+ </inject>
399
+
400
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
401
+ schema [{"name": "time", "type": "INTEGER"}]
402
+ CONFIG
403
+
404
+ buf = nil
405
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
406
+
407
+ assert_equal expected, MultiJson.load(buf)
408
+ end
409
+
410
+ def test_format_repeated_field_with_schema
411
+ now = Fluent::EventTime.new(Time.now.to_i)
412
+ input = {
413
+ "tty" => nil,
414
+ "pwd" => "/home/yugui",
415
+ "user" => "fluentd",
416
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
417
+ }
418
+ expected = {
419
+ "time" => now.to_f,
420
+ "pwd" => "/home/yugui",
421
+ "user" => "fluentd",
422
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
423
+ }
424
+
425
+ driver = create_driver(<<-CONFIG)
426
+ table foo
427
+ email foo@bar.example
428
+ private_key_path /path/to/key
429
+ project yourproject_id
430
+ dataset yourdataset_id
431
+
432
+ <inject>
433
+ time_format %s
434
+ time_key time
435
+ </inject>
436
+
437
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
438
+ schema [{"name": "time", "type": "INTEGER"}]
439
+ CONFIG
440
+
441
+ buf = nil
442
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
443
+
444
+ assert_equal expected, MultiJson.load(buf)
445
+ end
446
+
447
+ def test_format_fetch_from_bigquery_api
448
+ now = Fluent::EventTime.new(Time.now.to_i)
449
+ input = {
450
+ "tty" => nil,
451
+ "pwd" => "/home/yugui",
452
+ "user" => "fluentd",
453
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
454
+ }
455
+ expected = {
456
+ "time" => now.to_i,
457
+ "pwd" => "/home/yugui",
458
+ "user" => "fluentd",
459
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
460
+ }
461
+
462
+ driver = create_driver(<<-CONFIG)
463
+ table foo
464
+ email foo@bar.example
465
+ private_key_path /path/to/key
466
+ project yourproject_id
467
+ dataset yourdataset_id
468
+
469
+ <inject>
470
+ time_format %s
471
+ time_key time
472
+ </inject>
473
+
474
+ fetch_schema true
475
+ schema [{"name": "time", "type": "INTEGER"}]
476
+ CONFIG
477
+
478
+ stub_writer do |writer|
479
+ mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
480
+ sudo_schema_response["schema"]["fields"]
481
+ end
482
+ end
483
+
484
+ buf = nil
485
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
486
+
487
+ assert_equal expected, MultiJson.load(buf)
488
+
489
+ table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
490
+ assert table_schema["time"]
491
+ assert_equal :timestamp, table_schema["time"].type
492
+ assert_equal :required, table_schema["time"].mode
493
+
494
+ assert table_schema["tty"]
495
+ assert_equal :string, table_schema["tty"].type
496
+ assert_equal :nullable, table_schema["tty"].mode
497
+
498
+ assert table_schema["pwd"]
499
+ assert_equal :string, table_schema["pwd"].type
500
+ assert_equal :required, table_schema["pwd"].mode
501
+
502
+ assert table_schema["user"]
503
+ assert_equal :string, table_schema["user"].type
504
+ assert_equal :required, table_schema["user"].mode
505
+
506
+ assert table_schema["argv"]
507
+ assert_equal :string, table_schema["argv"].type
508
+ assert_equal :repeated, table_schema["argv"].mode
509
+ end
510
+
511
+ def test_format_fetch_from_bigquery_api_with_fetch_schema_table
512
+ now = Fluent::EventTime.new(Time.now.to_i)
513
+ input = {
514
+ "tty" => nil,
515
+ "pwd" => "/home/yugui",
516
+ "user" => "fluentd",
517
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
518
+ }
519
+ expected = {
520
+ "time" => now.to_i,
521
+ "pwd" => "/home/yugui",
522
+ "user" => "fluentd",
523
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
524
+ }
525
+
526
+ driver = create_driver(<<-CONFIG)
527
+ table foo_%Y_%m_%d
528
+ email foo@bar.example
529
+ private_key_path /path/to/key
530
+ project yourproject_id
531
+ dataset yourdataset_id
532
+
533
+ <inject>
534
+ time_format %s
535
+ time_key time
536
+ </inject>
537
+
538
+ fetch_schema true
539
+ fetch_schema_table foo
540
+ schema [{"name": "time", "type": "INTEGER"}]
541
+
542
+ <buffer time>
543
+ timekey 1d
544
+ </buffer>
545
+ CONFIG
546
+
547
+ stub_writer do |writer|
548
+ mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
549
+ sudo_schema_response["schema"]["fields"]
550
+ end
551
+ end
552
+
553
+ buf = nil
554
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
555
+
556
+ assert_equal expected, MultiJson.load(buf)
557
+
558
+ table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
559
+ assert table_schema["time"]
560
+ assert_equal :timestamp, table_schema["time"].type
561
+ assert_equal :required, table_schema["time"].mode
562
+
563
+ assert table_schema["tty"]
564
+ assert_equal :string, table_schema["tty"].type
565
+ assert_equal :nullable, table_schema["tty"].mode
566
+
567
+ assert table_schema["pwd"]
568
+ assert_equal :string, table_schema["pwd"].type
569
+ assert_equal :required, table_schema["pwd"].mode
570
+
571
+ assert table_schema["user"]
572
+ assert_equal :string, table_schema["user"].type
573
+ assert_equal :required, table_schema["user"].mode
574
+
575
+ assert table_schema["argv"]
576
+ assert_equal :string, table_schema["argv"].type
577
+ assert_equal :repeated, table_schema["argv"].mode
578
+ end
579
+ end