fluent-plugin-bigquery-test 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,544 @@
1
+ require 'helper'
2
+
3
+ class BigQueryInsertOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ table foo
10
+ email foo@bar.example
11
+ private_key_path /path/to/key
12
+ project yourproject_id
13
+ dataset yourdataset_id
14
+
15
+ <inject>
16
+ time_format %s
17
+ time_key time
18
+ </inject>
19
+
20
+ schema [
21
+ {"name": "time", "type": "INTEGER"},
22
+ {"name": "status", "type": "INTEGER"},
23
+ {"name": "bytes", "type": "INTEGER"},
24
+ {"name": "vhost", "type": "STRING"},
25
+ {"name": "path", "type": "STRING"},
26
+ {"name": "method", "type": "STRING"},
27
+ {"name": "protocol", "type": "STRING"},
28
+ {"name": "agent", "type": "STRING"},
29
+ {"name": "referer", "type": "STRING"},
30
+ {"name": "remote", "type": "RECORD", "fields": [
31
+ {"name": "host", "type": "STRING"},
32
+ {"name": "ip", "type": "STRING"},
33
+ {"name": "user", "type": "STRING"}
34
+ ]},
35
+ {"name": "requesttime", "type": "FLOAT"},
36
+ {"name": "bot_access", "type": "BOOLEAN"},
37
+ {"name": "loginsession", "type": "BOOLEAN"}
38
+ ]
39
+ ]
40
+
41
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
+
43
+ def create_driver(conf = CONFIG)
44
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryInsertOutput).configure(conf)
45
+ end
46
+
47
+ def stub_writer(stub_auth: true)
48
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
+ stub(writer).get_auth { nil } if stub_auth
50
+ yield writer
51
+ writer
52
+ end
53
+ end
54
+
55
+ def test__write_with_insert_id
56
+ now = Time.now.to_i
57
+ input = {
58
+ "uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
59
+ }
60
+ expected = {
61
+ insert_id: "9ABFF756-0267-4247-847F-0895B65F0938",
62
+ json: {
63
+ uuid: "9ABFF756-0267-4247-847F-0895B65F0938",
64
+ }
65
+ }
66
+
67
+ driver = create_driver(<<-CONFIG)
68
+ table foo
69
+ email foo@bar.example
70
+ private_key_path /path/to/key
71
+ project yourproject_id
72
+ dataset yourdataset_id
73
+
74
+ insert_id_field uuid
75
+ schema [{"name": "uuid", "type": "STRING"}]
76
+ CONFIG
77
+ mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
78
+
79
+ driver.run do
80
+ driver.feed('tag', now, input)
81
+ end
82
+ end
83
+
84
+ def test__write_with_nested_insert_id
85
+ input = {
86
+ "data" => {
87
+ "uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
88
+ },
89
+ }
90
+ expected = {
91
+ insert_id: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
92
+ json: {
93
+ data: {
94
+ uuid: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
95
+ }
96
+ }
97
+ }
98
+
99
+ driver = create_driver(<<-CONFIG)
100
+ table foo
101
+ email foo@bar.example
102
+ private_key_path /path/to/key
103
+ project yourproject_id
104
+ dataset yourdataset_id
105
+
106
+ insert_id_field $.data.uuid
107
+ schema [{"name": "data", "type": "RECORD", "fields": [
108
+ {"name": "uuid", "type": "STRING"}
109
+ ]}]
110
+ CONFIG
111
+
112
+ mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
113
+
114
+ driver.run do
115
+ driver.feed('tag', Fluent::EventTime.now, input)
116
+ end
117
+ end
118
+
119
+ def test_write
120
+ entry = {a: "b"}
121
+ driver = create_driver
122
+
123
+ stub_writer do |writer|
124
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
125
+ rows: [{json: hash_including(entry)}],
126
+ skip_invalid_rows: false,
127
+ ignore_unknown_values: false
128
+ }, {}) do
129
+ s = stub!
130
+ s.insert_errors { nil }
131
+ s
132
+ end
133
+ end
134
+
135
+ driver.run do
136
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
137
+ end
138
+ end
139
+
140
+ def test_write_with_retryable_error
141
+ data_input = [
142
+ { "status_code" => 500 },
143
+ { "status_code" => 502 },
144
+ { "status_code" => 503 },
145
+ { "status_code" => 504 },
146
+ ]
147
+
148
+ data_input.each do |d|
149
+ driver = create_driver(<<-CONFIG)
150
+ table foo
151
+ email foo@bar.example
152
+ private_key_path /path/to/key
153
+ project yourproject_id
154
+ dataset yourdataset_id
155
+
156
+ <inject>
157
+ time_format %s
158
+ time_key time
159
+ </inject>
160
+
161
+ schema [
162
+ {"name": "time", "type": "INTEGER"},
163
+ {"name": "status", "type": "INTEGER"},
164
+ {"name": "bytes", "type": "INTEGER"},
165
+ {"name": "vhost", "type": "STRING"},
166
+ {"name": "path", "type": "STRING"},
167
+ {"name": "method", "type": "STRING"},
168
+ {"name": "protocol", "type": "STRING"},
169
+ {"name": "agent", "type": "STRING"},
170
+ {"name": "referer", "type": "STRING"},
171
+ {"name": "remote", "type": "RECORD", "fields": [
172
+ {"name": "host", "type": "STRING"},
173
+ {"name": "ip", "type": "STRING"},
174
+ {"name": "user", "type": "STRING"}
175
+ ]},
176
+ {"name": "requesttime", "type": "FLOAT"},
177
+ {"name": "bot_access", "type": "BOOLEAN"},
178
+ {"name": "loginsession", "type": "BOOLEAN"}
179
+ ]
180
+ <secondary>
181
+ type file
182
+ path error
183
+ utc
184
+ </secondary>
185
+ CONFIG
186
+
187
+ entry = {a: "b"}
188
+ stub_writer do |writer|
189
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
190
+ rows: [{json: hash_including(entry)}],
191
+ skip_invalid_rows: false,
192
+ ignore_unknown_values: false
193
+ }, {}) do
194
+ ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
195
+ raise ex
196
+ end
197
+ end
198
+
199
+ assert_raise(Fluent::BigQuery::RetryableError) do
200
+ driver.run do
201
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ def test_write_with_not_retryable_error
208
+ driver = create_driver(<<-CONFIG)
209
+ table foo
210
+ email foo@bar.example
211
+ private_key_path /path/to/key
212
+ project yourproject_id
213
+ dataset yourdataset_id
214
+
215
+ <inject>
216
+ time_format %s
217
+ time_key time
218
+ </inject>
219
+
220
+ schema [
221
+ {"name": "time", "type": "INTEGER"},
222
+ {"name": "status", "type": "INTEGER"},
223
+ {"name": "bytes", "type": "INTEGER"},
224
+ {"name": "vhost", "type": "STRING"},
225
+ {"name": "path", "type": "STRING"},
226
+ {"name": "method", "type": "STRING"},
227
+ {"name": "protocol", "type": "STRING"},
228
+ {"name": "agent", "type": "STRING"},
229
+ {"name": "referer", "type": "STRING"},
230
+ {"name": "remote", "type": "RECORD", "fields": [
231
+ {"name": "host", "type": "STRING"},
232
+ {"name": "ip", "type": "STRING"},
233
+ {"name": "user", "type": "STRING"}
234
+ ]},
235
+ {"name": "requesttime", "type": "FLOAT"},
236
+ {"name": "bot_access", "type": "BOOLEAN"},
237
+ {"name": "loginsession", "type": "BOOLEAN"}
238
+ ]
239
+ <secondary>
240
+ type file
241
+ path error
242
+ utc
243
+ </secondary>
244
+ CONFIG
245
+
246
+ entry = {a: "b"}
247
+ stub_writer do |writer|
248
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
249
+ rows: [{json: hash_including(entry)}],
250
+ skip_invalid_rows: false,
251
+ ignore_unknown_values: false
252
+ }, {}) do
253
+ ex = Google::Apis::ServerError.new("error", status_code: 501)
254
+ def ex.reason
255
+ "invalid"
256
+ end
257
+ raise ex
258
+ end
259
+ end
260
+
261
+ driver.instance_start
262
+ tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
263
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
264
+ chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
265
+ c.append([driver.instance.format(tag, time, record)])
266
+ end
267
+ assert_raise Fluent::BigQuery::UnRetryableError do
268
+ driver.instance.write(chunk)
269
+ end
270
+ assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
271
+ driver.instance_shutdown
272
+ end
273
+
274
+ def test_write_with_row_based_table_id_formatting
275
+ entry = [
276
+ {json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")}},
277
+ ]
278
+ driver = create_driver(<<-CONFIG)
279
+ <buffer created_at>
280
+ </buffer>
281
+ table foo_${created_at}
282
+ email foo@bar.example
283
+ private_key_path /path/to/key
284
+ project yourproject_id
285
+ dataset yourdataset_id
286
+
287
+ schema [
288
+ {"name": "time", "type": "INTEGER"}
289
+ ]
290
+ CONFIG
291
+
292
+ stub_writer do |writer|
293
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
294
+ rows: [entry[0]],
295
+ skip_invalid_rows: false,
296
+ ignore_unknown_values: false
297
+ }, {}) { stub!.insert_errors { nil } }
298
+ end
299
+
300
+ driver.run do
301
+ driver.feed("tag", Time.now.to_i, {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")})
302
+ end
303
+ end
304
+
305
+ def test_auto_create_table_by_bigquery_api
306
+ now = Time.at(Time.now.to_i)
307
+ message = {
308
+ "time" => now.to_i,
309
+ "request" => {
310
+ "vhost" => "bar",
311
+ "path" => "/path/to/baz",
312
+ "method" => "GET",
313
+ "protocol" => "HTTP/1.0",
314
+ "agent" => "libwww",
315
+ "referer" => "http://referer.example",
316
+ "time" => (now - 1).to_f,
317
+ "bot_access" => true,
318
+ "loginsession" => false,
319
+ },
320
+ "remote" => {
321
+ "host" => "remote.example",
322
+ "ip" => "192.168.1.1",
323
+ "user" => "nagachika",
324
+ },
325
+ "response" => {
326
+ "status" => 200,
327
+ "bytes" => 72,
328
+ },
329
+ }
330
+
331
+ driver = create_driver(<<-CONFIG)
332
+ table foo
333
+ email foo@bar.example
334
+ private_key_path /path/to/key
335
+ project yourproject_id
336
+ dataset yourdataset_id
337
+
338
+ <inject>
339
+ time_format %s
340
+ time_key time
341
+ </inject>
342
+
343
+ auto_create_table true
344
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
345
+ CONFIG
346
+
347
+ stub_writer do |writer|
348
+ body = {
349
+ rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
350
+ skip_invalid_rows: false,
351
+ ignore_unknown_values: false,
352
+ }
353
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
354
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
355
+ end.at_least(1)
356
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
357
+
358
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
359
+ table_reference: {
360
+ table_id: 'foo',
361
+ },
362
+ schema: {
363
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
364
+ },
365
+ }, {})
366
+ end
367
+
368
+ assert_raise(RuntimeError) do
369
+ driver.run do
370
+ driver.feed("tag", Fluent::EventTime.from_time(now), message)
371
+ end
372
+ end
373
+ end
374
+
375
+ def test_auto_create_partitioned_table_by_bigquery_api
376
+ now = Time.now
377
+ message = {
378
+ json: {
379
+ time: now.to_i,
380
+ request: {
381
+ vhost: "bar",
382
+ path: "/path/to/baz",
383
+ method: "GET",
384
+ protocol: "HTTP/1.0",
385
+ agent: "libwww",
386
+ referer: "http://referer.example",
387
+ time: (now - 1).to_f,
388
+ bot_access: true,
389
+ loginsession: false,
390
+ },
391
+ remote: {
392
+ host: "remote.example",
393
+ ip: "192.168.1.1",
394
+ user: "nagachika",
395
+ },
396
+ response: {
397
+ status: 200,
398
+ bytes: 72,
399
+ },
400
+ }
401
+ }
402
+
403
+ driver = create_driver(<<-CONFIG)
404
+ table foo
405
+ email foo@bar.example
406
+ private_key_path /path/to/key
407
+ project yourproject_id
408
+ dataset yourdataset_id
409
+
410
+ time_format %s
411
+ time_field time
412
+
413
+ auto_create_table true
414
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
415
+
416
+ time_partitioning_type day
417
+ time_partitioning_field time
418
+ time_partitioning_expiration 1h
419
+ CONFIG
420
+
421
+ stub_writer do |writer|
422
+ body = {
423
+ rows: [message],
424
+ skip_invalid_rows: false,
425
+ ignore_unknown_values: false,
426
+ }
427
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
428
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
429
+ end.at_least(1)
430
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
431
+
432
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
433
+ table_reference: {
434
+ table_id: 'foo',
435
+ },
436
+ schema: {
437
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
438
+ },
439
+ time_partitioning: {
440
+ type: 'DAY',
441
+ field: 'time',
442
+ expiration_ms: 3600000,
443
+ },
444
+ }, {})
445
+ end
446
+
447
+ assert_raise(RuntimeError) do
448
+ driver.run do
449
+ driver.feed("tag", Fluent::EventTime.now, message[:json])
450
+ end
451
+ end
452
+ end
453
+
454
+ def test_auto_create_clustered_table_by_bigquery_api
455
+ now = Time.now
456
+ message = {
457
+ json: {
458
+ time: now.to_i,
459
+ request: {
460
+ vhost: "bar",
461
+ path: "/path/to/baz",
462
+ method: "GET",
463
+ protocol: "HTTP/1.0",
464
+ agent: "libwww",
465
+ referer: "http://referer.example",
466
+ time: (now - 1).to_f,
467
+ bot_access: true,
468
+ loginsession: false,
469
+ },
470
+ remote: {
471
+ host: "remote.example",
472
+ ip: "192.168.1.1",
473
+ user: "nagachika",
474
+ },
475
+ response: {
476
+ status: 200,
477
+ bytes: 72,
478
+ },
479
+ }
480
+ }
481
+
482
+ driver = create_driver(<<-CONFIG)
483
+ table foo
484
+ email foo@bar.example
485
+ private_key_path /path/to/key
486
+ project yourproject_id
487
+ dataset yourdataset_id
488
+
489
+ time_format %s
490
+ time_field time
491
+
492
+ auto_create_table true
493
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
494
+
495
+ time_partitioning_type day
496
+ time_partitioning_field time
497
+ time_partitioning_expiration 1h
498
+ time_partitioning_require_partition_filter true
499
+
500
+ clustering_fields [
501
+ "time",
502
+ "vhost"
503
+ ]
504
+ CONFIG
505
+
506
+ stub_writer do |writer|
507
+ body = {
508
+ rows: [message],
509
+ skip_invalid_rows: false,
510
+ ignore_unknown_values: false,
511
+ }
512
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
513
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
514
+ end.at_least(1)
515
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
516
+
517
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
518
+ table_reference: {
519
+ table_id: 'foo',
520
+ },
521
+ schema: {
522
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
523
+ },
524
+ time_partitioning: {
525
+ type: 'DAY',
526
+ field: 'time',
527
+ expiration_ms: 3600000,
528
+ },
529
+ clustering: {
530
+ fields: [
531
+ 'time',
532
+ 'vhost',
533
+ ],
534
+ },
535
+ }, {})
536
+ end
537
+
538
+ assert_raise(RuntimeError) do
539
+ driver.run do
540
+ driver.feed("tag", Fluent::EventTime.now, message[:json])
541
+ end
542
+ end
543
+ end
544
+ end