fluent-plugin-bigquery 1.2.0 → 2.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,1276 +0,0 @@
1
- require 'helper'
2
-
3
- class BigQueryOutputTest < Test::Unit::TestCase
4
- def setup
5
- Fluent::Test.setup
6
- end
7
-
8
- CONFIG = %[
9
- table foo
10
- email foo@bar.example
11
- private_key_path /path/to/key
12
- project yourproject_id
13
- dataset yourdataset_id
14
-
15
- <inject>
16
- time_format %s
17
- time_key time
18
- </inject>
19
-
20
- schema [
21
- {"name": "time", "type": "INTEGER"},
22
- {"name": "status", "type": "INTEGER"},
23
- {"name": "bytes", "type": "INTEGER"},
24
- {"name": "vhost", "type": "STRING"},
25
- {"name": "path", "type": "STRING"},
26
- {"name": "method", "type": "STRING"},
27
- {"name": "protocol", "type": "STRING"},
28
- {"name": "agent", "type": "STRING"},
29
- {"name": "referer", "type": "STRING"},
30
- {"name": "remote", "type": "RECORD", "fields": [
31
- {"name": "host", "type": "STRING"},
32
- {"name": "ip", "type": "STRING"},
33
- {"name": "user", "type": "STRING"}
34
- ]},
35
- {"name": "requesttime", "type": "FLOAT"},
36
- {"name": "bot_access", "type": "BOOLEAN"},
37
- {"name": "loginsession", "type": "BOOLEAN"}
38
- ]
39
- ]
40
-
41
- API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
-
43
- def create_driver(conf = CONFIG)
44
- Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryOutput).configure(conf)
45
- end
46
-
47
- def stub_writer(driver, stub_auth: true)
48
- stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
- stub(writer).get_auth { nil } if stub_auth
50
- yield writer
51
- writer
52
- end
53
- end
54
-
55
- def test_configure_table
56
- driver = create_driver
57
- assert_equal driver.instance.table, 'foo'
58
- assert_nil driver.instance.tables
59
-
60
- driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
61
- assert_nil driver.instance.table
62
- assert_equal driver.instance.tables, ['foo' ,'bar']
63
-
64
- assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
65
- create_driver(CONFIG + "tables foo,bar")
66
- }
67
- end
68
-
69
- def test_configure_auth_private_key
70
- driver = create_driver
71
- stub_writer(driver, stub_auth: false) do |writer|
72
- mock(writer).get_auth_from_private_key { stub! }
73
- end
74
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
75
- end
76
-
77
- def test_configure_auth_compute_engine
78
- driver = create_driver(%[
79
- table foo
80
- auth_method compute_engine
81
- project yourproject_id
82
- dataset yourdataset_id
83
- schema [
84
- {"name": "time", "type": "INTEGER"},
85
- {"name": "status", "type": "INTEGER"},
86
- {"name": "bytes", "type": "INTEGER"}
87
- ]
88
- ])
89
-
90
- stub_writer(driver, stub_auth: false) do |writer|
91
- mock(writer).get_auth_from_compute_engine { stub! }
92
- end
93
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
94
- end
95
-
96
- def test_configure_auth_json_key_as_file
97
- driver = create_driver(%[
98
- table foo
99
- auth_method json_key
100
- json_key jsonkey.josn
101
- project yourproject_id
102
- dataset yourdataset_id
103
- schema [
104
- {"name": "time", "type": "INTEGER"},
105
- {"name": "status", "type": "INTEGER"},
106
- {"name": "bytes", "type": "INTEGER"}
107
- ]
108
- ])
109
-
110
- stub_writer(driver, stub_auth: false) do |writer|
111
- mock(writer).get_auth_from_json_key { stub! }
112
- end
113
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
114
- end
115
-
116
- def test_configure_auth_json_key_as_file_raise_permission_error
117
- json_key_path = 'test/plugin/testdata/json_key.json'
118
- json_key_path_dir = File.dirname(json_key_path)
119
-
120
- begin
121
- File.chmod(0000, json_key_path_dir)
122
-
123
- driver = create_driver(%[
124
- table foo
125
- auth_method json_key
126
- json_key #{json_key_path}
127
- project yourproject_id
128
- dataset yourdataset_id
129
- schema [
130
- {"name": "time", "type": "INTEGER"},
131
- {"name": "status", "type": "INTEGER"},
132
- {"name": "bytes", "type": "INTEGER"}
133
- ]
134
- ])
135
- assert_raises(Errno::EACCES) do
136
- driver.instance.writer.client
137
- end
138
- ensure
139
- File.chmod(0755, json_key_path_dir)
140
- end
141
- end
142
-
143
- def test_configure_auth_json_key_as_string
144
- json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
145
- json_key_io = StringIO.new(json_key)
146
- authorization = Object.new
147
- stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
148
-
149
- driver = create_driver(%[
150
- table foo
151
- auth_method json_key
152
- json_key #{json_key}
153
- project yourproject_id
154
- dataset yourdataset_id
155
- schema [
156
- {"name": "time", "type": "INTEGER"},
157
- {"name": "status", "type": "INTEGER"},
158
- {"name": "bytes", "type": "INTEGER"}
159
- ]
160
- ])
161
- stub_writer(driver, stub_auth: false) do |writer|
162
- mock.proxy(writer).get_auth_from_json_key { stub! }
163
- end
164
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
165
- end
166
-
167
- def test_configure_auth_application_default
168
- driver = create_driver(%[
169
- table foo
170
- auth_method application_default
171
- project yourproject_id
172
- dataset yourdataset_id
173
- schema [
174
- {"name": "time", "type": "INTEGER"},
175
- {"name": "status", "type": "INTEGER"},
176
- {"name": "bytes", "type": "INTEGER"}
177
- ]
178
- ])
179
-
180
- stub_writer(driver, stub_auth: false) do |writer|
181
- mock.proxy(writer).get_auth_from_application_default { stub! }
182
- end
183
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
184
- end
185
-
186
- def test_format
187
- now = Fluent::EventTime.new(Time.now.to_i)
188
- input = {
189
- "status" => "1",
190
- "bytes" => 3.0,
191
- "vhost" => :bar,
192
- "path" => "/path/to/baz",
193
- "method" => "GET",
194
- "protocol" => "HTTP/0.9",
195
- "agent" => "libwww",
196
- "referer" => "http://referer.example",
197
- "requesttime" => (now - 1).to_f.to_s,
198
- "bot_access" => true,
199
- "loginsession" => false,
200
- "something-else" => "would be ignored",
201
- "yet-another" => {
202
- "foo" => "bar",
203
- "baz" => 1,
204
- },
205
- "remote" => {
206
- "host" => "remote.example",
207
- "ip" => "192.0.2.1",
208
- "port" => 12345,
209
- "user" => "tagomoris",
210
- }
211
- }
212
- expected = {
213
- "time" => now.to_i,
214
- "status" => 1,
215
- "bytes" => 3,
216
- "vhost" => "bar",
217
- "path" => "/path/to/baz",
218
- "method" => "GET",
219
- "protocol" => "HTTP/0.9",
220
- "agent" => "libwww",
221
- "referer" => "http://referer.example",
222
- "requesttime" => (now - 1).to_f.to_s.to_f,
223
- "bot_access" => true,
224
- "loginsession" => false,
225
- "something-else" => "would be ignored",
226
- "yet-another" => {
227
- "foo" => "bar",
228
- "baz" => 1,
229
- },
230
- "remote" => {
231
- "host" => "remote.example",
232
- "ip" => "192.0.2.1",
233
- "port" => 12345,
234
- "user" => "tagomoris",
235
- }
236
- }
237
-
238
- driver = create_driver(CONFIG)
239
- buf = nil
240
- driver.run { buf = driver.instance.format("my.tag", now, input) }
241
-
242
- assert_equal expected, MultiJson.load(buf)
243
- end
244
-
245
- [
246
- # <time_format>, <time field type>, <time expectation generator>, <assertion>
247
- [
248
- "%s.%6N",
249
- lambda{|t| t.strftime("%s.%6N").to_f },
250
- lambda{|recv, expected, actual|
251
- recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
252
- }
253
- ],
254
- [
255
- "%Y-%m-%dT%H:%M:%S%:z",
256
- lambda{|t| t.iso8601 },
257
- :assert_equal.to_proc
258
- ],
259
- ].each do |format, expect_time, assert|
260
- define_method("test_time_formats_#{format}") do
261
- now = Fluent::Engine.now
262
- input = {}
263
- expected = { "time" => expect_time[Time.at(now.to_r)] }
264
-
265
- driver = create_driver(<<-CONFIG)
266
- table foo
267
- email foo@bar.example
268
- private_key_path /path/to/key
269
- project yourproject_id
270
- dataset yourdataset_id
271
-
272
- <inject>
273
- time_format #{format}
274
- time_type string
275
- time_key time
276
- </inject>
277
-
278
- schema [
279
- {"name": "metadata", "type": "RECORD", "fields": [
280
- {"name": "time", "type": "INTEGER"},
281
- {"name": "node", "type": "STRING"}
282
- ]},
283
- {"name": "log", "type": "STRING"}
284
- ]
285
- CONFIG
286
-
287
- buf = nil
288
- driver.run { buf = driver.instance.format("my.tag", now, input) }
289
-
290
- assert[self, expected["time"], MultiJson.load(buf)["time"]]
291
- end
292
- end
293
-
294
- def test_format_with_schema
295
- now = Fluent::EventTime.new(Time.now.to_i)
296
- input = {
297
- "request" => {
298
- "vhost" => :bar,
299
- "path" => "/path/to/baz",
300
- "method" => "GET",
301
- "protocol" => "HTTP/0.9",
302
- "agent" => "libwww",
303
- "referer" => "http://referer.example",
304
- "time" => (now - 1).to_f,
305
- "bot_access" => true,
306
- "loginsession" => false,
307
- },
308
- "response" => {
309
- "status" => "1",
310
- "bytes" => 3.0,
311
- },
312
- "remote" => {
313
- "host" => "remote.example",
314
- "ip" => "192.0.2.1",
315
- "port" => 12345,
316
- "user" => "tagomoris",
317
- },
318
- "something-else" => "would be ignored",
319
- "yet-another" => {
320
- "foo" => "bar",
321
- "baz" => 1,
322
- },
323
- }
324
- expected = {
325
- "time" => now.to_f,
326
- "request" => {
327
- "vhost" => "bar",
328
- "path" => "/path/to/baz",
329
- "method" => "GET",
330
- "protocol" => "HTTP/0.9",
331
- "agent" => "libwww",
332
- "referer" => "http://referer.example",
333
- "time" => (now - 1).to_f,
334
- "bot_access" => true,
335
- "loginsession" => false,
336
- },
337
- "remote" => {
338
- "host" => "remote.example",
339
- "ip" => "192.0.2.1",
340
- "port" => 12345,
341
- "user" => "tagomoris",
342
- },
343
- "response" => {
344
- "status" => 1,
345
- "bytes" => 3,
346
- },
347
- "something-else" => "would be ignored",
348
- "yet-another" => {
349
- "foo" => "bar",
350
- "baz" => 1,
351
- },
352
- }
353
-
354
- driver = create_driver(<<-CONFIG)
355
- table foo
356
- email foo@bar.example
357
- private_key_path /path/to/key
358
- project yourproject_id
359
- dataset yourdataset_id
360
-
361
- <inject>
362
- time_format %s
363
- time_key time
364
- </inject>
365
-
366
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
367
- schema [{"name": "time", "type": "INTEGER"}]
368
- CONFIG
369
-
370
- buf = nil
371
- driver.run { buf = driver.instance.format("my.tag", now, input) }
372
-
373
- assert_equal expected, MultiJson.load(buf)
374
- end
375
-
376
- def test_format_repeated_field_with_schema
377
- now = Fluent::EventTime.new(Time.now.to_i)
378
- input = {
379
- "tty" => nil,
380
- "pwd" => "/home/yugui",
381
- "user" => "fluentd",
382
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
383
- }
384
- expected = {
385
- "time" => now.to_f,
386
- "pwd" => "/home/yugui",
387
- "user" => "fluentd",
388
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
389
- }
390
-
391
- driver = create_driver(<<-CONFIG)
392
- table foo
393
- email foo@bar.example
394
- private_key_path /path/to/key
395
- project yourproject_id
396
- dataset yourdataset_id
397
-
398
- <inject>
399
- time_format %s
400
- time_key time
401
- </inject>
402
-
403
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
404
- schema [{"name": "time", "type": "INTEGER"}]
405
- CONFIG
406
-
407
- buf = nil
408
- driver.run { buf = driver.instance.format("my.tag", now, input) }
409
-
410
- assert_equal expected, MultiJson.load(buf)
411
- end
412
-
413
- def test_format_fetch_from_bigquery_api
414
- now = Fluent::EventTime.new(Time.now.to_i)
415
- input = {
416
- "tty" => nil,
417
- "pwd" => "/home/yugui",
418
- "user" => "fluentd",
419
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
420
- }
421
- expected = {
422
- "time" => now.to_i,
423
- "pwd" => "/home/yugui",
424
- "user" => "fluentd",
425
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
426
- }
427
-
428
- driver = create_driver(<<-CONFIG)
429
- table foo
430
- email foo@bar.example
431
- private_key_path /path/to/key
432
- project yourproject_id
433
- dataset yourdataset_id
434
-
435
- <inject>
436
- time_format %s
437
- time_key time
438
- </inject>
439
-
440
- fetch_schema true
441
- schema [{"name": "time", "type": "INTEGER"}]
442
- CONFIG
443
-
444
- stub_writer(driver) do |writer|
445
- mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
446
- sudo_schema_response["schema"]["fields"]
447
- end
448
- end
449
-
450
- buf = nil
451
- driver.run { buf = driver.instance.format("my.tag", now, input) }
452
-
453
- assert_equal expected, MultiJson.load(buf)
454
-
455
- table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
456
- assert table_schema["time"]
457
- assert_equal :timestamp, table_schema["time"].type
458
- assert_equal :required, table_schema["time"].mode
459
-
460
- assert table_schema["tty"]
461
- assert_equal :string, table_schema["tty"].type
462
- assert_equal :nullable, table_schema["tty"].mode
463
-
464
- assert table_schema["pwd"]
465
- assert_equal :string, table_schema["pwd"].type
466
- assert_equal :required, table_schema["pwd"].mode
467
-
468
- assert table_schema["user"]
469
- assert_equal :string, table_schema["user"].type
470
- assert_equal :required, table_schema["user"].mode
471
-
472
- assert table_schema["argv"]
473
- assert_equal :string, table_schema["argv"].type
474
- assert_equal :repeated, table_schema["argv"].mode
475
- end
476
-
477
- def test_format_fetch_from_bigquery_api_with_fetch_schema_table
478
- now = Fluent::EventTime.new(Time.now.to_i)
479
- input = {
480
- "tty" => nil,
481
- "pwd" => "/home/yugui",
482
- "user" => "fluentd",
483
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
484
- }
485
- expected = {
486
- "time" => now.to_i,
487
- "pwd" => "/home/yugui",
488
- "user" => "fluentd",
489
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
490
- }
491
-
492
- driver = create_driver(<<-CONFIG)
493
- table foo_%Y_%m_%d
494
- email foo@bar.example
495
- private_key_path /path/to/key
496
- project yourproject_id
497
- dataset yourdataset_id
498
-
499
- <inject>
500
- time_format %s
501
- time_key time
502
- </inject>
503
-
504
- fetch_schema true
505
- fetch_schema_table foo
506
- schema [{"name": "time", "type": "INTEGER"}]
507
-
508
- <buffer time>
509
- timekey 1d
510
- </buffer>
511
- CONFIG
512
-
513
- stub_writer(driver) do |writer|
514
- mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
515
- sudo_schema_response["schema"]["fields"]
516
- end
517
- end
518
-
519
- buf = nil
520
- driver.run { buf = driver.instance.format("my.tag", now, input) }
521
-
522
- assert_equal expected, MultiJson.load(buf)
523
-
524
- table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
525
- assert table_schema["time"]
526
- assert_equal :timestamp, table_schema["time"].type
527
- assert_equal :required, table_schema["time"].mode
528
-
529
- assert table_schema["tty"]
530
- assert_equal :string, table_schema["tty"].type
531
- assert_equal :nullable, table_schema["tty"].mode
532
-
533
- assert table_schema["pwd"]
534
- assert_equal :string, table_schema["pwd"].type
535
- assert_equal :required, table_schema["pwd"].mode
536
-
537
- assert table_schema["user"]
538
- assert_equal :string, table_schema["user"].type
539
- assert_equal :required, table_schema["user"].mode
540
-
541
- assert table_schema["argv"]
542
- assert_equal :string, table_schema["argv"].type
543
- assert_equal :repeated, table_schema["argv"].mode
544
- end
545
-
546
- def test__write_with_insert_id
547
- now = Time.now.to_i
548
- input = {
549
- "uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
550
- }
551
- expected = {
552
- insert_id: "9ABFF756-0267-4247-847F-0895B65F0938",
553
- json: {
554
- uuid: "9ABFF756-0267-4247-847F-0895B65F0938",
555
- }
556
- }
557
-
558
- driver = create_driver(<<-CONFIG)
559
- table foo
560
- email foo@bar.example
561
- private_key_path /path/to/key
562
- project yourproject_id
563
- dataset yourdataset_id
564
-
565
- insert_id_field uuid
566
- schema [{"name": "uuid", "type": "STRING"}]
567
- CONFIG
568
- mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
569
-
570
- driver.run do
571
- driver.feed('tag', now, input)
572
- end
573
- end
574
-
575
- def test__write_with_nested_insert_id
576
- input = {
577
- "data" => {
578
- "uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
579
- },
580
- }
581
- expected = {
582
- insert_id: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
583
- json: {
584
- data: {
585
- uuid: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
586
- }
587
- }
588
- }
589
-
590
- driver = create_driver(<<-CONFIG)
591
- table foo
592
- email foo@bar.example
593
- private_key_path /path/to/key
594
- project yourproject_id
595
- dataset yourdataset_id
596
-
597
- insert_id_field data.uuid
598
- schema [{"name": "data", "type": "RECORD", "fields": [
599
- {"name": "uuid", "type": "STRING"}
600
- ]}]
601
- CONFIG
602
-
603
- mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
604
-
605
- driver.run do
606
- driver.feed('tag', Fluent::EventTime.now, input)
607
- end
608
- end
609
-
610
- def test_replace_record_key
611
- now = Fluent::EventTime.now
612
- input = {
613
- "vhost" => :bar,
614
- "@referer" => "http://referer.example",
615
- "bot_access" => true,
616
- "login-session" => false
617
- }
618
- expected = {
619
- "time" => now.to_i,
620
- "vhost" => "bar",
621
- "referer" => "http://referer.example",
622
- "bot_access" => true,
623
- "login_session" => false
624
- }
625
-
626
- driver = create_driver(<<-CONFIG)
627
- table foo
628
- email foo@bar.example
629
- private_key_path /path/to/key
630
- project yourproject_id
631
- dataset yourdataset_id
632
-
633
- replace_record_key true
634
- replace_record_key_regexp1 - _
635
-
636
- <inject>
637
- time_format %s
638
- time_key time
639
- </inject>
640
-
641
- schema [
642
- {"name": "time", "type": "INTEGER"},
643
- {"name": "vhost", "type": "STRING"},
644
- {"name": "refere", "type": "STRING"},
645
- {"name": "bot_access", "type": "BOOLEAN"},
646
- {"name": "login_session", "type": "BOOLEAN"}
647
- ]
648
- CONFIG
649
-
650
- buf = nil
651
- driver.run { buf = driver.instance.format("my.tag", now, input) }
652
-
653
- assert_equal expected, MultiJson.load(buf)
654
- end
655
-
656
- def test_write
657
- entry = {a: "b"}
658
- driver = create_driver
659
-
660
- stub_writer(driver) do |writer|
661
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
662
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
663
- rows: [{json: hash_including(entry)}],
664
- skip_invalid_rows: false,
665
- ignore_unknown_values: false
666
- }, {}) do
667
- s = stub!
668
- s.insert_errors { nil }
669
- s
670
- end
671
- end
672
-
673
- driver.run do
674
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
675
- end
676
- end
677
-
678
- def test_write_with_retryable_error
679
- data_input = [
680
- { "status_code" => 500 },
681
- { "status_code" => 502 },
682
- { "status_code" => 503 },
683
- { "status_code" => 504 },
684
- ]
685
-
686
- data_input.each do |d|
687
- driver = create_driver(<<-CONFIG)
688
- table foo
689
- email foo@bar.example
690
- private_key_path /path/to/key
691
- project yourproject_id
692
- dataset yourdataset_id
693
-
694
- <inject>
695
- time_format %s
696
- time_key time
697
- </inject>
698
-
699
- schema [
700
- {"name": "time", "type": "INTEGER"},
701
- {"name": "status", "type": "INTEGER"},
702
- {"name": "bytes", "type": "INTEGER"},
703
- {"name": "vhost", "type": "STRING"},
704
- {"name": "path", "type": "STRING"},
705
- {"name": "method", "type": "STRING"},
706
- {"name": "protocol", "type": "STRING"},
707
- {"name": "agent", "type": "STRING"},
708
- {"name": "referer", "type": "STRING"},
709
- {"name": "remote", "type": "RECORD", "fields": [
710
- {"name": "host", "type": "STRING"},
711
- {"name": "ip", "type": "STRING"},
712
- {"name": "user", "type": "STRING"}
713
- ]},
714
- {"name": "requesttime", "type": "FLOAT"},
715
- {"name": "bot_access", "type": "BOOLEAN"},
716
- {"name": "loginsession", "type": "BOOLEAN"}
717
- ]
718
- <secondary>
719
- type file
720
- path error
721
- utc
722
- </secondary>
723
- CONFIG
724
-
725
- entry = {a: "b"}
726
- stub_writer(driver) do |writer|
727
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
728
- rows: [{json: hash_including(entry)}],
729
- skip_invalid_rows: false,
730
- ignore_unknown_values: false
731
- }, {}) do
732
- ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
733
- raise ex
734
- end
735
- end
736
-
737
- assert_raise(Fluent::BigQuery::RetryableError) do
738
- driver.run do
739
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
740
- end
741
- end
742
- end
743
- end
744
-
745
- def test_write_with_not_retryable_error
746
- driver = create_driver(<<-CONFIG)
747
- table foo
748
- email foo@bar.example
749
- private_key_path /path/to/key
750
- project yourproject_id
751
- dataset yourdataset_id
752
-
753
- <inject>
754
- time_format %s
755
- time_key time
756
- </inject>
757
-
758
- schema [
759
- {"name": "time", "type": "INTEGER"},
760
- {"name": "status", "type": "INTEGER"},
761
- {"name": "bytes", "type": "INTEGER"},
762
- {"name": "vhost", "type": "STRING"},
763
- {"name": "path", "type": "STRING"},
764
- {"name": "method", "type": "STRING"},
765
- {"name": "protocol", "type": "STRING"},
766
- {"name": "agent", "type": "STRING"},
767
- {"name": "referer", "type": "STRING"},
768
- {"name": "remote", "type": "RECORD", "fields": [
769
- {"name": "host", "type": "STRING"},
770
- {"name": "ip", "type": "STRING"},
771
- {"name": "user", "type": "STRING"}
772
- ]},
773
- {"name": "requesttime", "type": "FLOAT"},
774
- {"name": "bot_access", "type": "BOOLEAN"},
775
- {"name": "loginsession", "type": "BOOLEAN"}
776
- ]
777
- <secondary>
778
- type file
779
- path error
780
- utc
781
- </secondary>
782
- CONFIG
783
-
784
- entry = {a: "b"}
785
- stub_writer(driver) do |writer|
786
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
787
- rows: [{json: hash_including(entry)}],
788
- skip_invalid_rows: false,
789
- ignore_unknown_values: false
790
- }, {}) do
791
- ex = Google::Apis::ServerError.new("error", status_code: 501)
792
- def ex.reason
793
- "invalid"
794
- end
795
- raise ex
796
- end
797
- end
798
-
799
- driver.instance_start
800
- tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
801
- metadata = driver.instance.metadata_for_test(tag, time, record)
802
- chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
803
- c.append([driver.instance.format(tag, time, record)])
804
- end
805
- assert_raise Fluent::BigQuery::UnRetryableError do
806
- driver.instance.write(chunk)
807
- end
808
- assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
809
- driver.instance_shutdown
810
- end
811
-
812
- def test_write_for_load
813
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
814
- driver = create_driver(<<-CONFIG)
815
- method load
816
- table foo
817
- email foo@bar.example
818
- private_key_path /path/to/key
819
- project yourproject_id
820
- dataset yourdataset_id
821
-
822
- <inject>
823
- time_format %s
824
- time_key time
825
- </inject>
826
-
827
- schema_path #{schema_path}
828
-
829
- buffer_type memory
830
- CONFIG
831
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
832
-
833
- io = StringIO.new("hello")
834
- mock(driver.instance).create_upload_source(is_a(Fluent::Plugin::Buffer::Chunk)).yields(io)
835
- stub_writer(driver) do |writer|
836
- mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
837
- mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
838
-
839
- mock(writer.client).insert_job('yourproject_id', {
840
- configuration: {
841
- load: {
842
- destination_table: {
843
- project_id: 'yourproject_id',
844
- dataset_id: 'yourdataset_id',
845
- table_id: 'foo',
846
- },
847
- schema: {
848
- fields: schema_fields,
849
- },
850
- write_disposition: "WRITE_APPEND",
851
- source_format: "NEWLINE_DELIMITED_JSON",
852
- ignore_unknown_values: false,
853
- max_bad_records: 0,
854
- }
855
- }
856
- }, {upload_source: io, content_type: "application/octet-stream"}) do
857
- s = stub!
858
- job_reference_stub = stub!
859
- s.job_reference { job_reference_stub }
860
- job_reference_stub.job_id { "dummy_job_id" }
861
- s
862
- end
863
- end
864
-
865
- driver.run do
866
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
867
- end
868
- end
869
-
870
- def test_write_for_load_with_prevent_duplicate_load
871
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
872
- driver = create_driver(<<-CONFIG)
873
- method load
874
- table foo
875
- email foo@bar.example
876
- private_key_path /path/to/key
877
- project yourproject_id
878
- dataset yourdataset_id
879
-
880
- <inject>
881
- time_format %s
882
- time_key time
883
- </inject>
884
-
885
- schema_path #{schema_path}
886
- prevent_duplicate_load true
887
-
888
- buffer_type memory
889
- CONFIG
890
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
891
-
892
- io = StringIO.new("hello")
893
- mock(driver.instance).create_upload_source(is_a(Fluent::Plugin::Buffer::Chunk)).yields(io)
894
- stub_writer(driver) do |writer|
895
- mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
896
- mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
897
-
898
- mock(writer.client).insert_job('yourproject_id', {
899
- configuration: {
900
- load: {
901
- destination_table: {
902
- project_id: 'yourproject_id',
903
- dataset_id: 'yourdataset_id',
904
- table_id: 'foo',
905
- },
906
- schema: {
907
- fields: schema_fields,
908
- },
909
- write_disposition: "WRITE_APPEND",
910
- source_format: "NEWLINE_DELIMITED_JSON",
911
- ignore_unknown_values: false,
912
- max_bad_records: 0,
913
- },
914
- },
915
- job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
916
- }, {upload_source: io, content_type: "application/octet-stream"}) do
917
- s = stub!
918
- job_reference_stub = stub!
919
- s.job_reference { job_reference_stub }
920
- job_reference_stub.job_id { "dummy_job_id" }
921
- s
922
- end
923
- end
924
-
925
- driver.run do
926
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
927
- end
928
- end
929
-
930
- def test_write_for_load_with_retryable_error
931
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
932
- driver = create_driver(<<-CONFIG)
933
- method load
934
- table foo
935
- email foo@bar.example
936
- private_key_path /path/to/key
937
- project yourproject-id
938
- dataset yourdataset_id
939
-
940
- <inject>
941
- time_format %s
942
- time_key time
943
- </inject>
944
-
945
- schema_path #{schema_path}
946
-
947
- buffer_type memory
948
- CONFIG
949
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
950
-
951
- driver.instance_start
952
- tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
953
- metadata = driver.instance.metadata_for_test(tag, time, record)
954
- chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
955
- c.append([driver.instance.format(tag, time, record)])
956
- end
957
-
958
- io = StringIO.new("hello")
959
- mock(driver.instance).create_upload_source(chunk).yields(io)
960
-
961
- stub_writer(driver) do |writer|
962
- mock(writer.client).get_table('yourproject-id', 'yourdataset_id', 'foo') { nil }
963
-
964
- mock(writer.client).insert_job('yourproject-id', {
965
- configuration: {
966
- load: {
967
- destination_table: {
968
- project_id: 'yourproject-id',
969
- dataset_id: 'yourdataset_id',
970
- table_id: 'foo',
971
- },
972
- schema: {
973
- fields: schema_fields,
974
- },
975
- write_disposition: "WRITE_APPEND",
976
- source_format: "NEWLINE_DELIMITED_JSON",
977
- ignore_unknown_values: false,
978
- max_bad_records: 0,
979
- }
980
- }
981
- }, {upload_source: io, content_type: "application/octet-stream"}) do
982
- s = stub!
983
- job_reference_stub = stub!
984
- s.job_reference { job_reference_stub }
985
- job_reference_stub.job_id { "dummy_job_id" }
986
- s
987
- end
988
-
989
- mock(writer.client).get_job('yourproject-id', 'dummy_job_id') do
990
- s = stub!
991
- status_stub = stub!
992
- error_result = stub!
993
-
994
- s.status { status_stub }
995
- status_stub.state { "DONE" }
996
- status_stub.error_result { error_result }
997
- status_stub.errors { nil }
998
- error_result.message { "error" }
999
- error_result.reason { "backendError" }
1000
- s
1001
- end
1002
- end
1003
-
1004
- assert_raise Fluent::BigQuery::RetryableError do
1005
- driver.instance.write(chunk)
1006
- end
1007
- driver.instance_shutdown
1008
- end
1009
-
1010
- def test_write_for_load_with_not_retryable_error
1011
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
1012
- driver = create_driver(<<-CONFIG)
1013
- method load
1014
- table foo
1015
- email foo@bar.example
1016
- private_key_path /path/to/key
1017
- project yourproject_id
1018
- dataset yourdataset_id
1019
-
1020
- <inject>
1021
- time_format %s
1022
- time_key time
1023
- </inject>
1024
-
1025
- schema_path #{schema_path}
1026
-
1027
- buffer_type memory
1028
- <secondary>
1029
- type file
1030
- path error
1031
- utc
1032
- </secondary>
1033
- CONFIG
1034
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
1035
-
1036
- driver.instance_start
1037
- tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
1038
- metadata = driver.instance.metadata_for_test(tag, time, record)
1039
- chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
1040
- c.append([driver.instance.format(tag, time, record)])
1041
- end
1042
-
1043
- io = StringIO.new("hello")
1044
- mock(driver.instance).create_upload_source(chunk).yields(io)
1045
- stub_writer(driver) do |writer|
1046
- mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
1047
-
1048
- mock(writer.client).insert_job('yourproject_id', {
1049
- configuration: {
1050
- load: {
1051
- destination_table: {
1052
- project_id: 'yourproject_id',
1053
- dataset_id: 'yourdataset_id',
1054
- table_id: 'foo',
1055
- },
1056
- schema: {
1057
- fields: schema_fields,
1058
- },
1059
- write_disposition: "WRITE_APPEND",
1060
- source_format: "NEWLINE_DELIMITED_JSON",
1061
- ignore_unknown_values: false,
1062
- max_bad_records: 0,
1063
- }
1064
- }
1065
- }, {upload_source: io, content_type: "application/octet-stream"}) do
1066
- s = stub!
1067
- job_reference_stub = stub!
1068
- s.job_reference { job_reference_stub }
1069
- job_reference_stub.job_id { "dummy_job_id" }
1070
- s
1071
- end
1072
-
1073
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
1074
- s = stub!
1075
- status_stub = stub!
1076
- error_result = stub!
1077
-
1078
- s.status { status_stub }
1079
- status_stub.state { "DONE" }
1080
- status_stub.error_result { error_result }
1081
- status_stub.errors { nil }
1082
- error_result.message { "error" }
1083
- error_result.reason { "invalid" }
1084
- s
1085
- end
1086
- end
1087
-
1088
- assert_raise Fluent::BigQuery::UnRetryableError do
1089
- driver.instance.write(chunk)
1090
- end
1091
- assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
1092
- driver.instance_shutdown
1093
- end
1094
-
1095
- def test_write_with_row_based_table_id_formatting
1096
- entry = [
1097
- {json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")}},
1098
- ]
1099
- driver = create_driver(<<-CONFIG)
1100
- <buffer created_at>
1101
- </buffer>
1102
- table foo_${created_at}
1103
- email foo@bar.example
1104
- private_key_path /path/to/key
1105
- project yourproject_id
1106
- dataset yourdataset_id
1107
-
1108
- schema [
1109
- {"name": "time", "type": "INTEGER"}
1110
- ]
1111
- CONFIG
1112
-
1113
- stub_writer(driver) do |writer|
1114
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
1115
- rows: [entry[0]],
1116
- skip_invalid_rows: false,
1117
- ignore_unknown_values: false
1118
- }, {}) { stub!.insert_errors { nil } }
1119
- end
1120
-
1121
- driver.run do
1122
- driver.feed("tag", Time.now.to_i, {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")})
1123
- end
1124
- end
1125
-
1126
- def test_auto_create_table_by_bigquery_api
1127
- now = Time.at(Time.now.to_i)
1128
- message = {
1129
- "time" => now.to_i,
1130
- "request" => {
1131
- "vhost" => "bar",
1132
- "path" => "/path/to/baz",
1133
- "method" => "GET",
1134
- "protocol" => "HTTP/1.0",
1135
- "agent" => "libwww",
1136
- "referer" => "http://referer.example",
1137
- "time" => (now - 1).to_f,
1138
- "bot_access" => true,
1139
- "loginsession" => false,
1140
- },
1141
- "remote" => {
1142
- "host" => "remote.example",
1143
- "ip" => "192.168.1.1",
1144
- "user" => "nagachika",
1145
- },
1146
- "response" => {
1147
- "status" => 200,
1148
- "bytes" => 72,
1149
- },
1150
- }
1151
-
1152
- driver = create_driver(<<-CONFIG)
1153
- table foo
1154
- email foo@bar.example
1155
- private_key_path /path/to/key
1156
- project yourproject_id
1157
- dataset yourdataset_id
1158
-
1159
- <inject>
1160
- time_format %s
1161
- time_key time
1162
- </inject>
1163
-
1164
- auto_create_table true
1165
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
1166
- CONFIG
1167
-
1168
- stub_writer(driver) do |writer|
1169
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
1170
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
1171
- end
1172
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
1173
- end
1174
-
1175
- assert_raise(RuntimeError) do
1176
- driver.run do
1177
- driver.feed("tag", Fluent::EventTime.from_time(now), message)
1178
- end
1179
- end
1180
- end
1181
-
1182
- def test_auto_create_partitioned_table_by_bigquery_api
1183
- now = Time.now
1184
- message = {
1185
- json: {
1186
- time: now.to_i,
1187
- request: {
1188
- vhost: "bar",
1189
- path: "/path/to/baz",
1190
- method: "GET",
1191
- protocol: "HTTP/1.0",
1192
- agent: "libwww",
1193
- referer: "http://referer.example",
1194
- time: (now - 1).to_f,
1195
- bot_access: true,
1196
- loginsession: false,
1197
- },
1198
- remote: {
1199
- host: "remote.example",
1200
- ip: "192.168.1.1",
1201
- user: "nagachika",
1202
- },
1203
- response: {
1204
- status: 200,
1205
- bytes: 72,
1206
- },
1207
- }
1208
- }
1209
-
1210
- driver = create_driver(<<-CONFIG)
1211
- table foo
1212
- email foo@bar.example
1213
- private_key_path /path/to/key
1214
- project yourproject_id
1215
- dataset yourdataset_id
1216
-
1217
- time_format %s
1218
- time_field time
1219
-
1220
- auto_create_table true
1221
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
1222
-
1223
- time_partitioning_type day
1224
- time_partitioning_expiration 1h
1225
- CONFIG
1226
-
1227
- stub_writer(driver) do |writer|
1228
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
1229
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
1230
- end
1231
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
1232
- end
1233
-
1234
- assert_raise(RuntimeError) do
1235
- driver.run do
1236
- driver.feed("tag", Fluent::EventTime.now, message[:json])
1237
- end
1238
- end
1239
- end
1240
-
1241
- private
1242
-
1243
- def sudo_schema_response
1244
- {
1245
- "schema" => {
1246
- "fields" => [
1247
- {
1248
- "name" => "time",
1249
- "type" => "TIMESTAMP",
1250
- "mode" => "REQUIRED"
1251
- },
1252
- {
1253
- "name" => "tty",
1254
- "type" => "STRING",
1255
- "mode" => "NULLABLE"
1256
- },
1257
- {
1258
- "name" => "pwd",
1259
- "type" => "STRING",
1260
- "mode" => "REQUIRED"
1261
- },
1262
- {
1263
- "name" => "user",
1264
- "type" => "STRING",
1265
- "mode" => "REQUIRED"
1266
- },
1267
- {
1268
- "name" => "argv",
1269
- "type" => "STRING",
1270
- "mode" => "REPEATED"
1271
- }
1272
- ]
1273
- }
1274
- }
1275
- end
1276
- end