fluent-plugin-bigquery-test 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,544 @@
1
+ require 'helper'
2
+
3
+ class BigQueryInsertOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ table foo
10
+ email foo@bar.example
11
+ private_key_path /path/to/key
12
+ project yourproject_id
13
+ dataset yourdataset_id
14
+
15
+ <inject>
16
+ time_format %s
17
+ time_key time
18
+ </inject>
19
+
20
+ schema [
21
+ {"name": "time", "type": "INTEGER"},
22
+ {"name": "status", "type": "INTEGER"},
23
+ {"name": "bytes", "type": "INTEGER"},
24
+ {"name": "vhost", "type": "STRING"},
25
+ {"name": "path", "type": "STRING"},
26
+ {"name": "method", "type": "STRING"},
27
+ {"name": "protocol", "type": "STRING"},
28
+ {"name": "agent", "type": "STRING"},
29
+ {"name": "referer", "type": "STRING"},
30
+ {"name": "remote", "type": "RECORD", "fields": [
31
+ {"name": "host", "type": "STRING"},
32
+ {"name": "ip", "type": "STRING"},
33
+ {"name": "user", "type": "STRING"}
34
+ ]},
35
+ {"name": "requesttime", "type": "FLOAT"},
36
+ {"name": "bot_access", "type": "BOOLEAN"},
37
+ {"name": "loginsession", "type": "BOOLEAN"}
38
+ ]
39
+ ]
40
+
41
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
+
43
+ def create_driver(conf = CONFIG)
44
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryInsertOutput).configure(conf)
45
+ end
46
+
47
+ def stub_writer(stub_auth: true)
48
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
+ stub(writer).get_auth { nil } if stub_auth
50
+ yield writer
51
+ writer
52
+ end
53
+ end
54
+
55
+ def test__write_with_insert_id
56
+ now = Time.now.to_i
57
+ input = {
58
+ "uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
59
+ }
60
+ expected = {
61
+ insert_id: "9ABFF756-0267-4247-847F-0895B65F0938",
62
+ json: {
63
+ uuid: "9ABFF756-0267-4247-847F-0895B65F0938",
64
+ }
65
+ }
66
+
67
+ driver = create_driver(<<-CONFIG)
68
+ table foo
69
+ email foo@bar.example
70
+ private_key_path /path/to/key
71
+ project yourproject_id
72
+ dataset yourdataset_id
73
+
74
+ insert_id_field uuid
75
+ schema [{"name": "uuid", "type": "STRING"}]
76
+ CONFIG
77
+ mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
78
+
79
+ driver.run do
80
+ driver.feed('tag', now, input)
81
+ end
82
+ end
83
+
84
+ def test__write_with_nested_insert_id
85
+ input = {
86
+ "data" => {
87
+ "uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
88
+ },
89
+ }
90
+ expected = {
91
+ insert_id: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
92
+ json: {
93
+ data: {
94
+ uuid: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
95
+ }
96
+ }
97
+ }
98
+
99
+ driver = create_driver(<<-CONFIG)
100
+ table foo
101
+ email foo@bar.example
102
+ private_key_path /path/to/key
103
+ project yourproject_id
104
+ dataset yourdataset_id
105
+
106
+ insert_id_field $.data.uuid
107
+ schema [{"name": "data", "type": "RECORD", "fields": [
108
+ {"name": "uuid", "type": "STRING"}
109
+ ]}]
110
+ CONFIG
111
+
112
+ mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
113
+
114
+ driver.run do
115
+ driver.feed('tag', Fluent::EventTime.now, input)
116
+ end
117
+ end
118
+
119
+ def test_write
120
+ entry = {a: "b"}
121
+ driver = create_driver
122
+
123
+ stub_writer do |writer|
124
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
125
+ rows: [{json: hash_including(entry)}],
126
+ skip_invalid_rows: false,
127
+ ignore_unknown_values: false
128
+ }, {}) do
129
+ s = stub!
130
+ s.insert_errors { nil }
131
+ s
132
+ end
133
+ end
134
+
135
+ driver.run do
136
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
137
+ end
138
+ end
139
+
140
+ def test_write_with_retryable_error
141
+ data_input = [
142
+ { "status_code" => 500 },
143
+ { "status_code" => 502 },
144
+ { "status_code" => 503 },
145
+ { "status_code" => 504 },
146
+ ]
147
+
148
+ data_input.each do |d|
149
+ driver = create_driver(<<-CONFIG)
150
+ table foo
151
+ email foo@bar.example
152
+ private_key_path /path/to/key
153
+ project yourproject_id
154
+ dataset yourdataset_id
155
+
156
+ <inject>
157
+ time_format %s
158
+ time_key time
159
+ </inject>
160
+
161
+ schema [
162
+ {"name": "time", "type": "INTEGER"},
163
+ {"name": "status", "type": "INTEGER"},
164
+ {"name": "bytes", "type": "INTEGER"},
165
+ {"name": "vhost", "type": "STRING"},
166
+ {"name": "path", "type": "STRING"},
167
+ {"name": "method", "type": "STRING"},
168
+ {"name": "protocol", "type": "STRING"},
169
+ {"name": "agent", "type": "STRING"},
170
+ {"name": "referer", "type": "STRING"},
171
+ {"name": "remote", "type": "RECORD", "fields": [
172
+ {"name": "host", "type": "STRING"},
173
+ {"name": "ip", "type": "STRING"},
174
+ {"name": "user", "type": "STRING"}
175
+ ]},
176
+ {"name": "requesttime", "type": "FLOAT"},
177
+ {"name": "bot_access", "type": "BOOLEAN"},
178
+ {"name": "loginsession", "type": "BOOLEAN"}
179
+ ]
180
+ <secondary>
181
+ type file
182
+ path error
183
+ utc
184
+ </secondary>
185
+ CONFIG
186
+
187
+ entry = {a: "b"}
188
+ stub_writer do |writer|
189
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
190
+ rows: [{json: hash_including(entry)}],
191
+ skip_invalid_rows: false,
192
+ ignore_unknown_values: false
193
+ }, {}) do
194
+ ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
195
+ raise ex
196
+ end
197
+ end
198
+
199
+ assert_raise(Fluent::BigQuery::RetryableError) do
200
+ driver.run do
201
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ def test_write_with_not_retryable_error
208
+ driver = create_driver(<<-CONFIG)
209
+ table foo
210
+ email foo@bar.example
211
+ private_key_path /path/to/key
212
+ project yourproject_id
213
+ dataset yourdataset_id
214
+
215
+ <inject>
216
+ time_format %s
217
+ time_key time
218
+ </inject>
219
+
220
+ schema [
221
+ {"name": "time", "type": "INTEGER"},
222
+ {"name": "status", "type": "INTEGER"},
223
+ {"name": "bytes", "type": "INTEGER"},
224
+ {"name": "vhost", "type": "STRING"},
225
+ {"name": "path", "type": "STRING"},
226
+ {"name": "method", "type": "STRING"},
227
+ {"name": "protocol", "type": "STRING"},
228
+ {"name": "agent", "type": "STRING"},
229
+ {"name": "referer", "type": "STRING"},
230
+ {"name": "remote", "type": "RECORD", "fields": [
231
+ {"name": "host", "type": "STRING"},
232
+ {"name": "ip", "type": "STRING"},
233
+ {"name": "user", "type": "STRING"}
234
+ ]},
235
+ {"name": "requesttime", "type": "FLOAT"},
236
+ {"name": "bot_access", "type": "BOOLEAN"},
237
+ {"name": "loginsession", "type": "BOOLEAN"}
238
+ ]
239
+ <secondary>
240
+ type file
241
+ path error
242
+ utc
243
+ </secondary>
244
+ CONFIG
245
+
246
+ entry = {a: "b"}
247
+ stub_writer do |writer|
248
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
249
+ rows: [{json: hash_including(entry)}],
250
+ skip_invalid_rows: false,
251
+ ignore_unknown_values: false
252
+ }, {}) do
253
+ ex = Google::Apis::ServerError.new("error", status_code: 501)
254
+ def ex.reason
255
+ "invalid"
256
+ end
257
+ raise ex
258
+ end
259
+ end
260
+
261
+ driver.instance_start
262
+ tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
263
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
264
+ chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
265
+ c.append([driver.instance.format(tag, time, record)])
266
+ end
267
+ assert_raise Fluent::BigQuery::UnRetryableError do
268
+ driver.instance.write(chunk)
269
+ end
270
+ assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
271
+ driver.instance_shutdown
272
+ end
273
+
274
+ def test_write_with_row_based_table_id_formatting
275
+ entry = [
276
+ {json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")}},
277
+ ]
278
+ driver = create_driver(<<-CONFIG)
279
+ <buffer created_at>
280
+ </buffer>
281
+ table foo_${created_at}
282
+ email foo@bar.example
283
+ private_key_path /path/to/key
284
+ project yourproject_id
285
+ dataset yourdataset_id
286
+
287
+ schema [
288
+ {"name": "time", "type": "INTEGER"}
289
+ ]
290
+ CONFIG
291
+
292
+ stub_writer do |writer|
293
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
294
+ rows: [entry[0]],
295
+ skip_invalid_rows: false,
296
+ ignore_unknown_values: false
297
+ }, {}) { stub!.insert_errors { nil } }
298
+ end
299
+
300
+ driver.run do
301
+ driver.feed("tag", Time.now.to_i, {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")})
302
+ end
303
+ end
304
+
305
+ def test_auto_create_table_by_bigquery_api
306
+ now = Time.at(Time.now.to_i)
307
+ message = {
308
+ "time" => now.to_i,
309
+ "request" => {
310
+ "vhost" => "bar",
311
+ "path" => "/path/to/baz",
312
+ "method" => "GET",
313
+ "protocol" => "HTTP/1.0",
314
+ "agent" => "libwww",
315
+ "referer" => "http://referer.example",
316
+ "time" => (now - 1).to_f,
317
+ "bot_access" => true,
318
+ "loginsession" => false,
319
+ },
320
+ "remote" => {
321
+ "host" => "remote.example",
322
+ "ip" => "192.168.1.1",
323
+ "user" => "nagachika",
324
+ },
325
+ "response" => {
326
+ "status" => 200,
327
+ "bytes" => 72,
328
+ },
329
+ }
330
+
331
+ driver = create_driver(<<-CONFIG)
332
+ table foo
333
+ email foo@bar.example
334
+ private_key_path /path/to/key
335
+ project yourproject_id
336
+ dataset yourdataset_id
337
+
338
+ <inject>
339
+ time_format %s
340
+ time_key time
341
+ </inject>
342
+
343
+ auto_create_table true
344
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
345
+ CONFIG
346
+
347
+ stub_writer do |writer|
348
+ body = {
349
+ rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
350
+ skip_invalid_rows: false,
351
+ ignore_unknown_values: false,
352
+ }
353
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
354
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
355
+ end.at_least(1)
356
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
357
+
358
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
359
+ table_reference: {
360
+ table_id: 'foo',
361
+ },
362
+ schema: {
363
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
364
+ },
365
+ }, {})
366
+ end
367
+
368
+ assert_raise(RuntimeError) do
369
+ driver.run do
370
+ driver.feed("tag", Fluent::EventTime.from_time(now), message)
371
+ end
372
+ end
373
+ end
374
+
375
+ def test_auto_create_partitioned_table_by_bigquery_api
376
+ now = Time.now
377
+ message = {
378
+ json: {
379
+ time: now.to_i,
380
+ request: {
381
+ vhost: "bar",
382
+ path: "/path/to/baz",
383
+ method: "GET",
384
+ protocol: "HTTP/1.0",
385
+ agent: "libwww",
386
+ referer: "http://referer.example",
387
+ time: (now - 1).to_f,
388
+ bot_access: true,
389
+ loginsession: false,
390
+ },
391
+ remote: {
392
+ host: "remote.example",
393
+ ip: "192.168.1.1",
394
+ user: "nagachika",
395
+ },
396
+ response: {
397
+ status: 200,
398
+ bytes: 72,
399
+ },
400
+ }
401
+ }
402
+
403
+ driver = create_driver(<<-CONFIG)
404
+ table foo
405
+ email foo@bar.example
406
+ private_key_path /path/to/key
407
+ project yourproject_id
408
+ dataset yourdataset_id
409
+
410
+ time_format %s
411
+ time_field time
412
+
413
+ auto_create_table true
414
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
415
+
416
+ time_partitioning_type day
417
+ time_partitioning_field time
418
+ time_partitioning_expiration 1h
419
+ CONFIG
420
+
421
+ stub_writer do |writer|
422
+ body = {
423
+ rows: [message],
424
+ skip_invalid_rows: false,
425
+ ignore_unknown_values: false,
426
+ }
427
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
428
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
429
+ end.at_least(1)
430
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
431
+
432
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
433
+ table_reference: {
434
+ table_id: 'foo',
435
+ },
436
+ schema: {
437
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
438
+ },
439
+ time_partitioning: {
440
+ type: 'DAY',
441
+ field: 'time',
442
+ expiration_ms: 3600000,
443
+ },
444
+ }, {})
445
+ end
446
+
447
+ assert_raise(RuntimeError) do
448
+ driver.run do
449
+ driver.feed("tag", Fluent::EventTime.now, message[:json])
450
+ end
451
+ end
452
+ end
453
+
454
+ def test_auto_create_clustered_table_by_bigquery_api
455
+ now = Time.now
456
+ message = {
457
+ json: {
458
+ time: now.to_i,
459
+ request: {
460
+ vhost: "bar",
461
+ path: "/path/to/baz",
462
+ method: "GET",
463
+ protocol: "HTTP/1.0",
464
+ agent: "libwww",
465
+ referer: "http://referer.example",
466
+ time: (now - 1).to_f,
467
+ bot_access: true,
468
+ loginsession: false,
469
+ },
470
+ remote: {
471
+ host: "remote.example",
472
+ ip: "192.168.1.1",
473
+ user: "nagachika",
474
+ },
475
+ response: {
476
+ status: 200,
477
+ bytes: 72,
478
+ },
479
+ }
480
+ }
481
+
482
+ driver = create_driver(<<-CONFIG)
483
+ table foo
484
+ email foo@bar.example
485
+ private_key_path /path/to/key
486
+ project yourproject_id
487
+ dataset yourdataset_id
488
+
489
+ time_format %s
490
+ time_field time
491
+
492
+ auto_create_table true
493
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
494
+
495
+ time_partitioning_type day
496
+ time_partitioning_field time
497
+ time_partitioning_expiration 1h
498
+ time_partitioning_require_partition_filter true
499
+
500
+ clustering_fields [
501
+ "time",
502
+ "vhost"
503
+ ]
504
+ CONFIG
505
+
506
+ stub_writer do |writer|
507
+ body = {
508
+ rows: [message],
509
+ skip_invalid_rows: false,
510
+ ignore_unknown_values: false,
511
+ }
512
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
513
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
514
+ end.at_least(1)
515
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
516
+
517
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
518
+ table_reference: {
519
+ table_id: 'foo',
520
+ },
521
+ schema: {
522
+ fields: driver.instance.instance_variable_get(:@table_schema).to_a,
523
+ },
524
+ time_partitioning: {
525
+ type: 'DAY',
526
+ field: 'time',
527
+ expiration_ms: 3600000,
528
+ },
529
+ clustering: {
530
+ fields: [
531
+ 'time',
532
+ 'vhost',
533
+ ],
534
+ },
535
+ }, {})
536
+ end
537
+
538
+ assert_raise(RuntimeError) do
539
+ driver.run do
540
+ driver.feed("tag", Fluent::EventTime.now, message[:json])
541
+ end
542
+ end
543
+ end
544
+ end