fluent-plugin-bigquery 1.2.0 → 2.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1276 +0,0 @@
1
- require 'helper'
2
-
3
- class BigQueryOutputTest < Test::Unit::TestCase
4
- def setup
5
- Fluent::Test.setup
6
- end
7
-
8
- CONFIG = %[
9
- table foo
10
- email foo@bar.example
11
- private_key_path /path/to/key
12
- project yourproject_id
13
- dataset yourdataset_id
14
-
15
- <inject>
16
- time_format %s
17
- time_key time
18
- </inject>
19
-
20
- schema [
21
- {"name": "time", "type": "INTEGER"},
22
- {"name": "status", "type": "INTEGER"},
23
- {"name": "bytes", "type": "INTEGER"},
24
- {"name": "vhost", "type": "STRING"},
25
- {"name": "path", "type": "STRING"},
26
- {"name": "method", "type": "STRING"},
27
- {"name": "protocol", "type": "STRING"},
28
- {"name": "agent", "type": "STRING"},
29
- {"name": "referer", "type": "STRING"},
30
- {"name": "remote", "type": "RECORD", "fields": [
31
- {"name": "host", "type": "STRING"},
32
- {"name": "ip", "type": "STRING"},
33
- {"name": "user", "type": "STRING"}
34
- ]},
35
- {"name": "requesttime", "type": "FLOAT"},
36
- {"name": "bot_access", "type": "BOOLEAN"},
37
- {"name": "loginsession", "type": "BOOLEAN"}
38
- ]
39
- ]
40
-
41
- API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
-
43
- def create_driver(conf = CONFIG)
44
- Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryOutput).configure(conf)
45
- end
46
-
47
- def stub_writer(driver, stub_auth: true)
48
- stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
- stub(writer).get_auth { nil } if stub_auth
50
- yield writer
51
- writer
52
- end
53
- end
54
-
55
- def test_configure_table
56
- driver = create_driver
57
- assert_equal driver.instance.table, 'foo'
58
- assert_nil driver.instance.tables
59
-
60
- driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
61
- assert_nil driver.instance.table
62
- assert_equal driver.instance.tables, ['foo' ,'bar']
63
-
64
- assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
65
- create_driver(CONFIG + "tables foo,bar")
66
- }
67
- end
68
-
69
- def test_configure_auth_private_key
70
- driver = create_driver
71
- stub_writer(driver, stub_auth: false) do |writer|
72
- mock(writer).get_auth_from_private_key { stub! }
73
- end
74
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
75
- end
76
-
77
- def test_configure_auth_compute_engine
78
- driver = create_driver(%[
79
- table foo
80
- auth_method compute_engine
81
- project yourproject_id
82
- dataset yourdataset_id
83
- schema [
84
- {"name": "time", "type": "INTEGER"},
85
- {"name": "status", "type": "INTEGER"},
86
- {"name": "bytes", "type": "INTEGER"}
87
- ]
88
- ])
89
-
90
- stub_writer(driver, stub_auth: false) do |writer|
91
- mock(writer).get_auth_from_compute_engine { stub! }
92
- end
93
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
94
- end
95
-
96
- def test_configure_auth_json_key_as_file
97
- driver = create_driver(%[
98
- table foo
99
- auth_method json_key
100
- json_key jsonkey.josn
101
- project yourproject_id
102
- dataset yourdataset_id
103
- schema [
104
- {"name": "time", "type": "INTEGER"},
105
- {"name": "status", "type": "INTEGER"},
106
- {"name": "bytes", "type": "INTEGER"}
107
- ]
108
- ])
109
-
110
- stub_writer(driver, stub_auth: false) do |writer|
111
- mock(writer).get_auth_from_json_key { stub! }
112
- end
113
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
114
- end
115
-
116
- def test_configure_auth_json_key_as_file_raise_permission_error
117
- json_key_path = 'test/plugin/testdata/json_key.json'
118
- json_key_path_dir = File.dirname(json_key_path)
119
-
120
- begin
121
- File.chmod(0000, json_key_path_dir)
122
-
123
- driver = create_driver(%[
124
- table foo
125
- auth_method json_key
126
- json_key #{json_key_path}
127
- project yourproject_id
128
- dataset yourdataset_id
129
- schema [
130
- {"name": "time", "type": "INTEGER"},
131
- {"name": "status", "type": "INTEGER"},
132
- {"name": "bytes", "type": "INTEGER"}
133
- ]
134
- ])
135
- assert_raises(Errno::EACCES) do
136
- driver.instance.writer.client
137
- end
138
- ensure
139
- File.chmod(0755, json_key_path_dir)
140
- end
141
- end
142
-
143
- def test_configure_auth_json_key_as_string
144
- json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
145
- json_key_io = StringIO.new(json_key)
146
- authorization = Object.new
147
- stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
148
-
149
- driver = create_driver(%[
150
- table foo
151
- auth_method json_key
152
- json_key #{json_key}
153
- project yourproject_id
154
- dataset yourdataset_id
155
- schema [
156
- {"name": "time", "type": "INTEGER"},
157
- {"name": "status", "type": "INTEGER"},
158
- {"name": "bytes", "type": "INTEGER"}
159
- ]
160
- ])
161
- stub_writer(driver, stub_auth: false) do |writer|
162
- mock.proxy(writer).get_auth_from_json_key { stub! }
163
- end
164
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
165
- end
166
-
167
- def test_configure_auth_application_default
168
- driver = create_driver(%[
169
- table foo
170
- auth_method application_default
171
- project yourproject_id
172
- dataset yourdataset_id
173
- schema [
174
- {"name": "time", "type": "INTEGER"},
175
- {"name": "status", "type": "INTEGER"},
176
- {"name": "bytes", "type": "INTEGER"}
177
- ]
178
- ])
179
-
180
- stub_writer(driver, stub_auth: false) do |writer|
181
- mock.proxy(writer).get_auth_from_application_default { stub! }
182
- end
183
- assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
184
- end
185
-
186
- def test_format
187
- now = Fluent::EventTime.new(Time.now.to_i)
188
- input = {
189
- "status" => "1",
190
- "bytes" => 3.0,
191
- "vhost" => :bar,
192
- "path" => "/path/to/baz",
193
- "method" => "GET",
194
- "protocol" => "HTTP/0.9",
195
- "agent" => "libwww",
196
- "referer" => "http://referer.example",
197
- "requesttime" => (now - 1).to_f.to_s,
198
- "bot_access" => true,
199
- "loginsession" => false,
200
- "something-else" => "would be ignored",
201
- "yet-another" => {
202
- "foo" => "bar",
203
- "baz" => 1,
204
- },
205
- "remote" => {
206
- "host" => "remote.example",
207
- "ip" => "192.0.2.1",
208
- "port" => 12345,
209
- "user" => "tagomoris",
210
- }
211
- }
212
- expected = {
213
- "time" => now.to_i,
214
- "status" => 1,
215
- "bytes" => 3,
216
- "vhost" => "bar",
217
- "path" => "/path/to/baz",
218
- "method" => "GET",
219
- "protocol" => "HTTP/0.9",
220
- "agent" => "libwww",
221
- "referer" => "http://referer.example",
222
- "requesttime" => (now - 1).to_f.to_s.to_f,
223
- "bot_access" => true,
224
- "loginsession" => false,
225
- "something-else" => "would be ignored",
226
- "yet-another" => {
227
- "foo" => "bar",
228
- "baz" => 1,
229
- },
230
- "remote" => {
231
- "host" => "remote.example",
232
- "ip" => "192.0.2.1",
233
- "port" => 12345,
234
- "user" => "tagomoris",
235
- }
236
- }
237
-
238
- driver = create_driver(CONFIG)
239
- buf = nil
240
- driver.run { buf = driver.instance.format("my.tag", now, input) }
241
-
242
- assert_equal expected, MultiJson.load(buf)
243
- end
244
-
245
- [
246
- # <time_format>, <time field type>, <time expectation generator>, <assertion>
247
- [
248
- "%s.%6N",
249
- lambda{|t| t.strftime("%s.%6N").to_f },
250
- lambda{|recv, expected, actual|
251
- recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
252
- }
253
- ],
254
- [
255
- "%Y-%m-%dT%H:%M:%S%:z",
256
- lambda{|t| t.iso8601 },
257
- :assert_equal.to_proc
258
- ],
259
- ].each do |format, expect_time, assert|
260
- define_method("test_time_formats_#{format}") do
261
- now = Fluent::Engine.now
262
- input = {}
263
- expected = { "time" => expect_time[Time.at(now.to_r)] }
264
-
265
- driver = create_driver(<<-CONFIG)
266
- table foo
267
- email foo@bar.example
268
- private_key_path /path/to/key
269
- project yourproject_id
270
- dataset yourdataset_id
271
-
272
- <inject>
273
- time_format #{format}
274
- time_type string
275
- time_key time
276
- </inject>
277
-
278
- schema [
279
- {"name": "metadata", "type": "RECORD", "fields": [
280
- {"name": "time", "type": "INTEGER"},
281
- {"name": "node", "type": "STRING"}
282
- ]},
283
- {"name": "log", "type": "STRING"}
284
- ]
285
- CONFIG
286
-
287
- buf = nil
288
- driver.run { buf = driver.instance.format("my.tag", now, input) }
289
-
290
- assert[self, expected["time"], MultiJson.load(buf)["time"]]
291
- end
292
- end
293
-
294
- def test_format_with_schema
295
- now = Fluent::EventTime.new(Time.now.to_i)
296
- input = {
297
- "request" => {
298
- "vhost" => :bar,
299
- "path" => "/path/to/baz",
300
- "method" => "GET",
301
- "protocol" => "HTTP/0.9",
302
- "agent" => "libwww",
303
- "referer" => "http://referer.example",
304
- "time" => (now - 1).to_f,
305
- "bot_access" => true,
306
- "loginsession" => false,
307
- },
308
- "response" => {
309
- "status" => "1",
310
- "bytes" => 3.0,
311
- },
312
- "remote" => {
313
- "host" => "remote.example",
314
- "ip" => "192.0.2.1",
315
- "port" => 12345,
316
- "user" => "tagomoris",
317
- },
318
- "something-else" => "would be ignored",
319
- "yet-another" => {
320
- "foo" => "bar",
321
- "baz" => 1,
322
- },
323
- }
324
- expected = {
325
- "time" => now.to_f,
326
- "request" => {
327
- "vhost" => "bar",
328
- "path" => "/path/to/baz",
329
- "method" => "GET",
330
- "protocol" => "HTTP/0.9",
331
- "agent" => "libwww",
332
- "referer" => "http://referer.example",
333
- "time" => (now - 1).to_f,
334
- "bot_access" => true,
335
- "loginsession" => false,
336
- },
337
- "remote" => {
338
- "host" => "remote.example",
339
- "ip" => "192.0.2.1",
340
- "port" => 12345,
341
- "user" => "tagomoris",
342
- },
343
- "response" => {
344
- "status" => 1,
345
- "bytes" => 3,
346
- },
347
- "something-else" => "would be ignored",
348
- "yet-another" => {
349
- "foo" => "bar",
350
- "baz" => 1,
351
- },
352
- }
353
-
354
- driver = create_driver(<<-CONFIG)
355
- table foo
356
- email foo@bar.example
357
- private_key_path /path/to/key
358
- project yourproject_id
359
- dataset yourdataset_id
360
-
361
- <inject>
362
- time_format %s
363
- time_key time
364
- </inject>
365
-
366
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
367
- schema [{"name": "time", "type": "INTEGER"}]
368
- CONFIG
369
-
370
- buf = nil
371
- driver.run { buf = driver.instance.format("my.tag", now, input) }
372
-
373
- assert_equal expected, MultiJson.load(buf)
374
- end
375
-
376
- def test_format_repeated_field_with_schema
377
- now = Fluent::EventTime.new(Time.now.to_i)
378
- input = {
379
- "tty" => nil,
380
- "pwd" => "/home/yugui",
381
- "user" => "fluentd",
382
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
383
- }
384
- expected = {
385
- "time" => now.to_f,
386
- "pwd" => "/home/yugui",
387
- "user" => "fluentd",
388
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
389
- }
390
-
391
- driver = create_driver(<<-CONFIG)
392
- table foo
393
- email foo@bar.example
394
- private_key_path /path/to/key
395
- project yourproject_id
396
- dataset yourdataset_id
397
-
398
- <inject>
399
- time_format %s
400
- time_key time
401
- </inject>
402
-
403
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
404
- schema [{"name": "time", "type": "INTEGER"}]
405
- CONFIG
406
-
407
- buf = nil
408
- driver.run { buf = driver.instance.format("my.tag", now, input) }
409
-
410
- assert_equal expected, MultiJson.load(buf)
411
- end
412
-
413
- def test_format_fetch_from_bigquery_api
414
- now = Fluent::EventTime.new(Time.now.to_i)
415
- input = {
416
- "tty" => nil,
417
- "pwd" => "/home/yugui",
418
- "user" => "fluentd",
419
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
420
- }
421
- expected = {
422
- "time" => now.to_i,
423
- "pwd" => "/home/yugui",
424
- "user" => "fluentd",
425
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
426
- }
427
-
428
- driver = create_driver(<<-CONFIG)
429
- table foo
430
- email foo@bar.example
431
- private_key_path /path/to/key
432
- project yourproject_id
433
- dataset yourdataset_id
434
-
435
- <inject>
436
- time_format %s
437
- time_key time
438
- </inject>
439
-
440
- fetch_schema true
441
- schema [{"name": "time", "type": "INTEGER"}]
442
- CONFIG
443
-
444
- stub_writer(driver) do |writer|
445
- mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
446
- sudo_schema_response["schema"]["fields"]
447
- end
448
- end
449
-
450
- buf = nil
451
- driver.run { buf = driver.instance.format("my.tag", now, input) }
452
-
453
- assert_equal expected, MultiJson.load(buf)
454
-
455
- table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
456
- assert table_schema["time"]
457
- assert_equal :timestamp, table_schema["time"].type
458
- assert_equal :required, table_schema["time"].mode
459
-
460
- assert table_schema["tty"]
461
- assert_equal :string, table_schema["tty"].type
462
- assert_equal :nullable, table_schema["tty"].mode
463
-
464
- assert table_schema["pwd"]
465
- assert_equal :string, table_schema["pwd"].type
466
- assert_equal :required, table_schema["pwd"].mode
467
-
468
- assert table_schema["user"]
469
- assert_equal :string, table_schema["user"].type
470
- assert_equal :required, table_schema["user"].mode
471
-
472
- assert table_schema["argv"]
473
- assert_equal :string, table_schema["argv"].type
474
- assert_equal :repeated, table_schema["argv"].mode
475
- end
476
-
477
- def test_format_fetch_from_bigquery_api_with_fetch_schema_table
478
- now = Fluent::EventTime.new(Time.now.to_i)
479
- input = {
480
- "tty" => nil,
481
- "pwd" => "/home/yugui",
482
- "user" => "fluentd",
483
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
484
- }
485
- expected = {
486
- "time" => now.to_i,
487
- "pwd" => "/home/yugui",
488
- "user" => "fluentd",
489
- "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
490
- }
491
-
492
- driver = create_driver(<<-CONFIG)
493
- table foo_%Y_%m_%d
494
- email foo@bar.example
495
- private_key_path /path/to/key
496
- project yourproject_id
497
- dataset yourdataset_id
498
-
499
- <inject>
500
- time_format %s
501
- time_key time
502
- </inject>
503
-
504
- fetch_schema true
505
- fetch_schema_table foo
506
- schema [{"name": "time", "type": "INTEGER"}]
507
-
508
- <buffer time>
509
- timekey 1d
510
- </buffer>
511
- CONFIG
512
-
513
- stub_writer(driver) do |writer|
514
- mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
515
- sudo_schema_response["schema"]["fields"]
516
- end
517
- end
518
-
519
- buf = nil
520
- driver.run { buf = driver.instance.format("my.tag", now, input) }
521
-
522
- assert_equal expected, MultiJson.load(buf)
523
-
524
- table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
525
- assert table_schema["time"]
526
- assert_equal :timestamp, table_schema["time"].type
527
- assert_equal :required, table_schema["time"].mode
528
-
529
- assert table_schema["tty"]
530
- assert_equal :string, table_schema["tty"].type
531
- assert_equal :nullable, table_schema["tty"].mode
532
-
533
- assert table_schema["pwd"]
534
- assert_equal :string, table_schema["pwd"].type
535
- assert_equal :required, table_schema["pwd"].mode
536
-
537
- assert table_schema["user"]
538
- assert_equal :string, table_schema["user"].type
539
- assert_equal :required, table_schema["user"].mode
540
-
541
- assert table_schema["argv"]
542
- assert_equal :string, table_schema["argv"].type
543
- assert_equal :repeated, table_schema["argv"].mode
544
- end
545
-
546
- def test__write_with_insert_id
547
- now = Time.now.to_i
548
- input = {
549
- "uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
550
- }
551
- expected = {
552
- insert_id: "9ABFF756-0267-4247-847F-0895B65F0938",
553
- json: {
554
- uuid: "9ABFF756-0267-4247-847F-0895B65F0938",
555
- }
556
- }
557
-
558
- driver = create_driver(<<-CONFIG)
559
- table foo
560
- email foo@bar.example
561
- private_key_path /path/to/key
562
- project yourproject_id
563
- dataset yourdataset_id
564
-
565
- insert_id_field uuid
566
- schema [{"name": "uuid", "type": "STRING"}]
567
- CONFIG
568
- mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
569
-
570
- driver.run do
571
- driver.feed('tag', now, input)
572
- end
573
- end
574
-
575
- def test__write_with_nested_insert_id
576
- input = {
577
- "data" => {
578
- "uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
579
- },
580
- }
581
- expected = {
582
- insert_id: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
583
- json: {
584
- data: {
585
- uuid: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
586
- }
587
- }
588
- }
589
-
590
- driver = create_driver(<<-CONFIG)
591
- table foo
592
- email foo@bar.example
593
- private_key_path /path/to/key
594
- project yourproject_id
595
- dataset yourdataset_id
596
-
597
- insert_id_field data.uuid
598
- schema [{"name": "data", "type": "RECORD", "fields": [
599
- {"name": "uuid", "type": "STRING"}
600
- ]}]
601
- CONFIG
602
-
603
- mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
604
-
605
- driver.run do
606
- driver.feed('tag', Fluent::EventTime.now, input)
607
- end
608
- end
609
-
610
- def test_replace_record_key
611
- now = Fluent::EventTime.now
612
- input = {
613
- "vhost" => :bar,
614
- "@referer" => "http://referer.example",
615
- "bot_access" => true,
616
- "login-session" => false
617
- }
618
- expected = {
619
- "time" => now.to_i,
620
- "vhost" => "bar",
621
- "referer" => "http://referer.example",
622
- "bot_access" => true,
623
- "login_session" => false
624
- }
625
-
626
- driver = create_driver(<<-CONFIG)
627
- table foo
628
- email foo@bar.example
629
- private_key_path /path/to/key
630
- project yourproject_id
631
- dataset yourdataset_id
632
-
633
- replace_record_key true
634
- replace_record_key_regexp1 - _
635
-
636
- <inject>
637
- time_format %s
638
- time_key time
639
- </inject>
640
-
641
- schema [
642
- {"name": "time", "type": "INTEGER"},
643
- {"name": "vhost", "type": "STRING"},
644
- {"name": "refere", "type": "STRING"},
645
- {"name": "bot_access", "type": "BOOLEAN"},
646
- {"name": "login_session", "type": "BOOLEAN"}
647
- ]
648
- CONFIG
649
-
650
- buf = nil
651
- driver.run { buf = driver.instance.format("my.tag", now, input) }
652
-
653
- assert_equal expected, MultiJson.load(buf)
654
- end
655
-
656
- def test_write
657
- entry = {a: "b"}
658
- driver = create_driver
659
-
660
- stub_writer(driver) do |writer|
661
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
662
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
663
- rows: [{json: hash_including(entry)}],
664
- skip_invalid_rows: false,
665
- ignore_unknown_values: false
666
- }, {}) do
667
- s = stub!
668
- s.insert_errors { nil }
669
- s
670
- end
671
- end
672
-
673
- driver.run do
674
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
675
- end
676
- end
677
-
678
- def test_write_with_retryable_error
679
- data_input = [
680
- { "status_code" => 500 },
681
- { "status_code" => 502 },
682
- { "status_code" => 503 },
683
- { "status_code" => 504 },
684
- ]
685
-
686
- data_input.each do |d|
687
- driver = create_driver(<<-CONFIG)
688
- table foo
689
- email foo@bar.example
690
- private_key_path /path/to/key
691
- project yourproject_id
692
- dataset yourdataset_id
693
-
694
- <inject>
695
- time_format %s
696
- time_key time
697
- </inject>
698
-
699
- schema [
700
- {"name": "time", "type": "INTEGER"},
701
- {"name": "status", "type": "INTEGER"},
702
- {"name": "bytes", "type": "INTEGER"},
703
- {"name": "vhost", "type": "STRING"},
704
- {"name": "path", "type": "STRING"},
705
- {"name": "method", "type": "STRING"},
706
- {"name": "protocol", "type": "STRING"},
707
- {"name": "agent", "type": "STRING"},
708
- {"name": "referer", "type": "STRING"},
709
- {"name": "remote", "type": "RECORD", "fields": [
710
- {"name": "host", "type": "STRING"},
711
- {"name": "ip", "type": "STRING"},
712
- {"name": "user", "type": "STRING"}
713
- ]},
714
- {"name": "requesttime", "type": "FLOAT"},
715
- {"name": "bot_access", "type": "BOOLEAN"},
716
- {"name": "loginsession", "type": "BOOLEAN"}
717
- ]
718
- <secondary>
719
- type file
720
- path error
721
- utc
722
- </secondary>
723
- CONFIG
724
-
725
- entry = {a: "b"}
726
- stub_writer(driver) do |writer|
727
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
728
- rows: [{json: hash_including(entry)}],
729
- skip_invalid_rows: false,
730
- ignore_unknown_values: false
731
- }, {}) do
732
- ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
733
- raise ex
734
- end
735
- end
736
-
737
- assert_raise(Fluent::BigQuery::RetryableError) do
738
- driver.run do
739
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
740
- end
741
- end
742
- end
743
- end
744
-
745
- def test_write_with_not_retryable_error
746
- driver = create_driver(<<-CONFIG)
747
- table foo
748
- email foo@bar.example
749
- private_key_path /path/to/key
750
- project yourproject_id
751
- dataset yourdataset_id
752
-
753
- <inject>
754
- time_format %s
755
- time_key time
756
- </inject>
757
-
758
- schema [
759
- {"name": "time", "type": "INTEGER"},
760
- {"name": "status", "type": "INTEGER"},
761
- {"name": "bytes", "type": "INTEGER"},
762
- {"name": "vhost", "type": "STRING"},
763
- {"name": "path", "type": "STRING"},
764
- {"name": "method", "type": "STRING"},
765
- {"name": "protocol", "type": "STRING"},
766
- {"name": "agent", "type": "STRING"},
767
- {"name": "referer", "type": "STRING"},
768
- {"name": "remote", "type": "RECORD", "fields": [
769
- {"name": "host", "type": "STRING"},
770
- {"name": "ip", "type": "STRING"},
771
- {"name": "user", "type": "STRING"}
772
- ]},
773
- {"name": "requesttime", "type": "FLOAT"},
774
- {"name": "bot_access", "type": "BOOLEAN"},
775
- {"name": "loginsession", "type": "BOOLEAN"}
776
- ]
777
- <secondary>
778
- type file
779
- path error
780
- utc
781
- </secondary>
782
- CONFIG
783
-
784
- entry = {a: "b"}
785
- stub_writer(driver) do |writer|
786
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
787
- rows: [{json: hash_including(entry)}],
788
- skip_invalid_rows: false,
789
- ignore_unknown_values: false
790
- }, {}) do
791
- ex = Google::Apis::ServerError.new("error", status_code: 501)
792
- def ex.reason
793
- "invalid"
794
- end
795
- raise ex
796
- end
797
- end
798
-
799
- driver.instance_start
800
- tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
801
- metadata = driver.instance.metadata_for_test(tag, time, record)
802
- chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
803
- c.append([driver.instance.format(tag, time, record)])
804
- end
805
- assert_raise Fluent::BigQuery::UnRetryableError do
806
- driver.instance.write(chunk)
807
- end
808
- assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
809
- driver.instance_shutdown
810
- end
811
-
812
- def test_write_for_load
813
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
814
- driver = create_driver(<<-CONFIG)
815
- method load
816
- table foo
817
- email foo@bar.example
818
- private_key_path /path/to/key
819
- project yourproject_id
820
- dataset yourdataset_id
821
-
822
- <inject>
823
- time_format %s
824
- time_key time
825
- </inject>
826
-
827
- schema_path #{schema_path}
828
-
829
- buffer_type memory
830
- CONFIG
831
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
832
-
833
- io = StringIO.new("hello")
834
- mock(driver.instance).create_upload_source(is_a(Fluent::Plugin::Buffer::Chunk)).yields(io)
835
- stub_writer(driver) do |writer|
836
- mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
837
- mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
838
-
839
- mock(writer.client).insert_job('yourproject_id', {
840
- configuration: {
841
- load: {
842
- destination_table: {
843
- project_id: 'yourproject_id',
844
- dataset_id: 'yourdataset_id',
845
- table_id: 'foo',
846
- },
847
- schema: {
848
- fields: schema_fields,
849
- },
850
- write_disposition: "WRITE_APPEND",
851
- source_format: "NEWLINE_DELIMITED_JSON",
852
- ignore_unknown_values: false,
853
- max_bad_records: 0,
854
- }
855
- }
856
- }, {upload_source: io, content_type: "application/octet-stream"}) do
857
- s = stub!
858
- job_reference_stub = stub!
859
- s.job_reference { job_reference_stub }
860
- job_reference_stub.job_id { "dummy_job_id" }
861
- s
862
- end
863
- end
864
-
865
- driver.run do
866
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
867
- end
868
- end
869
-
870
- def test_write_for_load_with_prevent_duplicate_load
871
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
872
- driver = create_driver(<<-CONFIG)
873
- method load
874
- table foo
875
- email foo@bar.example
876
- private_key_path /path/to/key
877
- project yourproject_id
878
- dataset yourdataset_id
879
-
880
- <inject>
881
- time_format %s
882
- time_key time
883
- </inject>
884
-
885
- schema_path #{schema_path}
886
- prevent_duplicate_load true
887
-
888
- buffer_type memory
889
- CONFIG
890
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
891
-
892
- io = StringIO.new("hello")
893
- mock(driver.instance).create_upload_source(is_a(Fluent::Plugin::Buffer::Chunk)).yields(io)
894
- stub_writer(driver) do |writer|
895
- mock(writer).wait_load_job(is_a(String), "yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
896
- mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
897
-
898
- mock(writer.client).insert_job('yourproject_id', {
899
- configuration: {
900
- load: {
901
- destination_table: {
902
- project_id: 'yourproject_id',
903
- dataset_id: 'yourdataset_id',
904
- table_id: 'foo',
905
- },
906
- schema: {
907
- fields: schema_fields,
908
- },
909
- write_disposition: "WRITE_APPEND",
910
- source_format: "NEWLINE_DELIMITED_JSON",
911
- ignore_unknown_values: false,
912
- max_bad_records: 0,
913
- },
914
- },
915
- job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
916
- }, {upload_source: io, content_type: "application/octet-stream"}) do
917
- s = stub!
918
- job_reference_stub = stub!
919
- s.job_reference { job_reference_stub }
920
- job_reference_stub.job_id { "dummy_job_id" }
921
- s
922
- end
923
- end
924
-
925
- driver.run do
926
- driver.feed("tag", Time.now.to_i, {"a" => "b"})
927
- end
928
- end
929
-
930
- def test_write_for_load_with_retryable_error
931
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
932
- driver = create_driver(<<-CONFIG)
933
- method load
934
- table foo
935
- email foo@bar.example
936
- private_key_path /path/to/key
937
- project yourproject-id
938
- dataset yourdataset_id
939
-
940
- <inject>
941
- time_format %s
942
- time_key time
943
- </inject>
944
-
945
- schema_path #{schema_path}
946
-
947
- buffer_type memory
948
- CONFIG
949
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
950
-
951
- driver.instance_start
952
- tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
953
- metadata = driver.instance.metadata_for_test(tag, time, record)
954
- chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
955
- c.append([driver.instance.format(tag, time, record)])
956
- end
957
-
958
- io = StringIO.new("hello")
959
- mock(driver.instance).create_upload_source(chunk).yields(io)
960
-
961
- stub_writer(driver) do |writer|
962
- mock(writer.client).get_table('yourproject-id', 'yourdataset_id', 'foo') { nil }
963
-
964
- mock(writer.client).insert_job('yourproject-id', {
965
- configuration: {
966
- load: {
967
- destination_table: {
968
- project_id: 'yourproject-id',
969
- dataset_id: 'yourdataset_id',
970
- table_id: 'foo',
971
- },
972
- schema: {
973
- fields: schema_fields,
974
- },
975
- write_disposition: "WRITE_APPEND",
976
- source_format: "NEWLINE_DELIMITED_JSON",
977
- ignore_unknown_values: false,
978
- max_bad_records: 0,
979
- }
980
- }
981
- }, {upload_source: io, content_type: "application/octet-stream"}) do
982
- s = stub!
983
- job_reference_stub = stub!
984
- s.job_reference { job_reference_stub }
985
- job_reference_stub.job_id { "dummy_job_id" }
986
- s
987
- end
988
-
989
- mock(writer.client).get_job('yourproject-id', 'dummy_job_id') do
990
- s = stub!
991
- status_stub = stub!
992
- error_result = stub!
993
-
994
- s.status { status_stub }
995
- status_stub.state { "DONE" }
996
- status_stub.error_result { error_result }
997
- status_stub.errors { nil }
998
- error_result.message { "error" }
999
- error_result.reason { "backendError" }
1000
- s
1001
- end
1002
- end
1003
-
1004
- assert_raise Fluent::BigQuery::RetryableError do
1005
- driver.instance.write(chunk)
1006
- end
1007
- driver.instance_shutdown
1008
- end
1009
-
1010
- def test_write_for_load_with_not_retryable_error
1011
- schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
1012
- driver = create_driver(<<-CONFIG)
1013
- method load
1014
- table foo
1015
- email foo@bar.example
1016
- private_key_path /path/to/key
1017
- project yourproject_id
1018
- dataset yourdataset_id
1019
-
1020
- <inject>
1021
- time_format %s
1022
- time_key time
1023
- </inject>
1024
-
1025
- schema_path #{schema_path}
1026
-
1027
- buffer_type memory
1028
- <secondary>
1029
- type file
1030
- path error
1031
- utc
1032
- </secondary>
1033
- CONFIG
1034
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(schema_path)))
1035
-
1036
- driver.instance_start
1037
- tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
1038
- metadata = driver.instance.metadata_for_test(tag, time, record)
1039
- chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
1040
- c.append([driver.instance.format(tag, time, record)])
1041
- end
1042
-
1043
- io = StringIO.new("hello")
1044
- mock(driver.instance).create_upload_source(chunk).yields(io)
1045
- stub_writer(driver) do |writer|
1046
- mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
1047
-
1048
- mock(writer.client).insert_job('yourproject_id', {
1049
- configuration: {
1050
- load: {
1051
- destination_table: {
1052
- project_id: 'yourproject_id',
1053
- dataset_id: 'yourdataset_id',
1054
- table_id: 'foo',
1055
- },
1056
- schema: {
1057
- fields: schema_fields,
1058
- },
1059
- write_disposition: "WRITE_APPEND",
1060
- source_format: "NEWLINE_DELIMITED_JSON",
1061
- ignore_unknown_values: false,
1062
- max_bad_records: 0,
1063
- }
1064
- }
1065
- }, {upload_source: io, content_type: "application/octet-stream"}) do
1066
- s = stub!
1067
- job_reference_stub = stub!
1068
- s.job_reference { job_reference_stub }
1069
- job_reference_stub.job_id { "dummy_job_id" }
1070
- s
1071
- end
1072
-
1073
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
1074
- s = stub!
1075
- status_stub = stub!
1076
- error_result = stub!
1077
-
1078
- s.status { status_stub }
1079
- status_stub.state { "DONE" }
1080
- status_stub.error_result { error_result }
1081
- status_stub.errors { nil }
1082
- error_result.message { "error" }
1083
- error_result.reason { "invalid" }
1084
- s
1085
- end
1086
- end
1087
-
1088
- assert_raise Fluent::BigQuery::UnRetryableError do
1089
- driver.instance.write(chunk)
1090
- end
1091
- assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
1092
- driver.instance_shutdown
1093
- end
1094
-
1095
- def test_write_with_row_based_table_id_formatting
1096
- entry = [
1097
- {json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")}},
1098
- ]
1099
- driver = create_driver(<<-CONFIG)
1100
- <buffer created_at>
1101
- </buffer>
1102
- table foo_${created_at}
1103
- email foo@bar.example
1104
- private_key_path /path/to/key
1105
- project yourproject_id
1106
- dataset yourdataset_id
1107
-
1108
- schema [
1109
- {"name": "time", "type": "INTEGER"}
1110
- ]
1111
- CONFIG
1112
-
1113
- stub_writer(driver) do |writer|
1114
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
1115
- rows: [entry[0]],
1116
- skip_invalid_rows: false,
1117
- ignore_unknown_values: false
1118
- }, {}) { stub!.insert_errors { nil } }
1119
- end
1120
-
1121
- driver.run do
1122
- driver.feed("tag", Time.now.to_i, {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")})
1123
- end
1124
- end
1125
-
1126
- def test_auto_create_table_by_bigquery_api
1127
- now = Time.at(Time.now.to_i)
1128
- message = {
1129
- "time" => now.to_i,
1130
- "request" => {
1131
- "vhost" => "bar",
1132
- "path" => "/path/to/baz",
1133
- "method" => "GET",
1134
- "protocol" => "HTTP/1.0",
1135
- "agent" => "libwww",
1136
- "referer" => "http://referer.example",
1137
- "time" => (now - 1).to_f,
1138
- "bot_access" => true,
1139
- "loginsession" => false,
1140
- },
1141
- "remote" => {
1142
- "host" => "remote.example",
1143
- "ip" => "192.168.1.1",
1144
- "user" => "nagachika",
1145
- },
1146
- "response" => {
1147
- "status" => 200,
1148
- "bytes" => 72,
1149
- },
1150
- }
1151
-
1152
- driver = create_driver(<<-CONFIG)
1153
- table foo
1154
- email foo@bar.example
1155
- private_key_path /path/to/key
1156
- project yourproject_id
1157
- dataset yourdataset_id
1158
-
1159
- <inject>
1160
- time_format %s
1161
- time_key time
1162
- </inject>
1163
-
1164
- auto_create_table true
1165
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
1166
- CONFIG
1167
-
1168
- stub_writer(driver) do |writer|
1169
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
1170
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
1171
- end
1172
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
1173
- end
1174
-
1175
- assert_raise(RuntimeError) do
1176
- driver.run do
1177
- driver.feed("tag", Fluent::EventTime.from_time(now), message)
1178
- end
1179
- end
1180
- end
1181
-
1182
- def test_auto_create_partitioned_table_by_bigquery_api
1183
- now = Time.now
1184
- message = {
1185
- json: {
1186
- time: now.to_i,
1187
- request: {
1188
- vhost: "bar",
1189
- path: "/path/to/baz",
1190
- method: "GET",
1191
- protocol: "HTTP/1.0",
1192
- agent: "libwww",
1193
- referer: "http://referer.example",
1194
- time: (now - 1).to_f,
1195
- bot_access: true,
1196
- loginsession: false,
1197
- },
1198
- remote: {
1199
- host: "remote.example",
1200
- ip: "192.168.1.1",
1201
- user: "nagachika",
1202
- },
1203
- response: {
1204
- status: 200,
1205
- bytes: 72,
1206
- },
1207
- }
1208
- }
1209
-
1210
- driver = create_driver(<<-CONFIG)
1211
- table foo
1212
- email foo@bar.example
1213
- private_key_path /path/to/key
1214
- project yourproject_id
1215
- dataset yourdataset_id
1216
-
1217
- time_format %s
1218
- time_field time
1219
-
1220
- auto_create_table true
1221
- schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
1222
-
1223
- time_partitioning_type day
1224
- time_partitioning_expiration 1h
1225
- CONFIG
1226
-
1227
- stub_writer(driver) do |writer|
1228
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
1229
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
1230
- end
1231
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
1232
- end
1233
-
1234
- assert_raise(RuntimeError) do
1235
- driver.run do
1236
- driver.feed("tag", Fluent::EventTime.now, message[:json])
1237
- end
1238
- end
1239
- end
1240
-
1241
- private
1242
-
1243
- def sudo_schema_response
1244
- {
1245
- "schema" => {
1246
- "fields" => [
1247
- {
1248
- "name" => "time",
1249
- "type" => "TIMESTAMP",
1250
- "mode" => "REQUIRED"
1251
- },
1252
- {
1253
- "name" => "tty",
1254
- "type" => "STRING",
1255
- "mode" => "NULLABLE"
1256
- },
1257
- {
1258
- "name" => "pwd",
1259
- "type" => "STRING",
1260
- "mode" => "REQUIRED"
1261
- },
1262
- {
1263
- "name" => "user",
1264
- "type" => "STRING",
1265
- "mode" => "REQUIRED"
1266
- },
1267
- {
1268
- "name" => "argv",
1269
- "type" => "STRING",
1270
- "mode" => "REPEATED"
1271
- }
1272
- ]
1273
- }
1274
- }
1275
- end
1276
- end