fluent-plugin-bigquery 1.2.0 → 2.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -9
- data/README.md +68 -65
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +45 -39
- data/lib/fluent/plugin/out_bigquery_base.rb +211 -0
- data/lib/fluent/plugin/out_bigquery_insert.rb +131 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +220 -0
- data/test/helper.rb +3 -1
- data/test/plugin/test_out_bigquery_base.rb +579 -0
- data/test/plugin/test_out_bigquery_insert.rb +420 -0
- data/test/plugin/test_out_bigquery_load.rb +310 -0
- metadata +13 -7
- data/lib/fluent/plugin/out_bigquery.rb +0 -500
- data/test/plugin/test_out_bigquery.rb +0 -1276
@@ -0,0 +1,420 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class BigQueryInsertOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
table foo
|
10
|
+
email foo@bar.example
|
11
|
+
private_key_path /path/to/key
|
12
|
+
project yourproject_id
|
13
|
+
dataset yourdataset_id
|
14
|
+
|
15
|
+
<inject>
|
16
|
+
time_format %s
|
17
|
+
time_key time
|
18
|
+
</inject>
|
19
|
+
|
20
|
+
schema [
|
21
|
+
{"name": "time", "type": "INTEGER"},
|
22
|
+
{"name": "status", "type": "INTEGER"},
|
23
|
+
{"name": "bytes", "type": "INTEGER"},
|
24
|
+
{"name": "vhost", "type": "STRING"},
|
25
|
+
{"name": "path", "type": "STRING"},
|
26
|
+
{"name": "method", "type": "STRING"},
|
27
|
+
{"name": "protocol", "type": "STRING"},
|
28
|
+
{"name": "agent", "type": "STRING"},
|
29
|
+
{"name": "referer", "type": "STRING"},
|
30
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
31
|
+
{"name": "host", "type": "STRING"},
|
32
|
+
{"name": "ip", "type": "STRING"},
|
33
|
+
{"name": "user", "type": "STRING"}
|
34
|
+
]},
|
35
|
+
{"name": "requesttime", "type": "FLOAT"},
|
36
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
37
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
38
|
+
]
|
39
|
+
]
|
40
|
+
|
41
|
+
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
42
|
+
|
43
|
+
def create_driver(conf = CONFIG)
|
44
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryInsertOutput).configure(conf)
|
45
|
+
end
|
46
|
+
|
47
|
+
def stub_writer(stub_auth: true)
|
48
|
+
stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
|
49
|
+
stub(writer).get_auth { nil } if stub_auth
|
50
|
+
yield writer
|
51
|
+
writer
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def test__write_with_insert_id
|
56
|
+
now = Time.now.to_i
|
57
|
+
input = {
|
58
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
59
|
+
}
|
60
|
+
expected = {
|
61
|
+
insert_id: "9ABFF756-0267-4247-847F-0895B65F0938",
|
62
|
+
json: {
|
63
|
+
uuid: "9ABFF756-0267-4247-847F-0895B65F0938",
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
driver = create_driver(<<-CONFIG)
|
68
|
+
table foo
|
69
|
+
email foo@bar.example
|
70
|
+
private_key_path /path/to/key
|
71
|
+
project yourproject_id
|
72
|
+
dataset yourdataset_id
|
73
|
+
|
74
|
+
insert_id_field uuid
|
75
|
+
schema [{"name": "uuid", "type": "STRING"}]
|
76
|
+
CONFIG
|
77
|
+
mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
|
78
|
+
|
79
|
+
driver.run do
|
80
|
+
driver.feed('tag', now, input)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def test__write_with_nested_insert_id
|
85
|
+
input = {
|
86
|
+
"data" => {
|
87
|
+
"uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
88
|
+
},
|
89
|
+
}
|
90
|
+
expected = {
|
91
|
+
insert_id: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
92
|
+
json: {
|
93
|
+
data: {
|
94
|
+
uuid: "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
95
|
+
}
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
driver = create_driver(<<-CONFIG)
|
100
|
+
table foo
|
101
|
+
email foo@bar.example
|
102
|
+
private_key_path /path/to/key
|
103
|
+
project yourproject_id
|
104
|
+
dataset yourdataset_id
|
105
|
+
|
106
|
+
insert_id_field $.data.uuid
|
107
|
+
schema [{"name": "data", "type": "RECORD", "fields": [
|
108
|
+
{"name": "uuid", "type": "STRING"}
|
109
|
+
]}]
|
110
|
+
CONFIG
|
111
|
+
|
112
|
+
mock(driver.instance).insert("yourproject_id", "yourdataset_id", "foo", [expected], instance_of(Fluent::BigQuery::RecordSchema), nil)
|
113
|
+
|
114
|
+
driver.run do
|
115
|
+
driver.feed('tag', Fluent::EventTime.now, input)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_write
|
120
|
+
entry = {a: "b"}
|
121
|
+
driver = create_driver
|
122
|
+
|
123
|
+
stub_writer do |writer|
|
124
|
+
mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
|
125
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
126
|
+
rows: [{json: hash_including(entry)}],
|
127
|
+
skip_invalid_rows: false,
|
128
|
+
ignore_unknown_values: false
|
129
|
+
}, {}) do
|
130
|
+
s = stub!
|
131
|
+
s.insert_errors { nil }
|
132
|
+
s
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
driver.run do
|
137
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b"})
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_write_with_retryable_error
|
142
|
+
data_input = [
|
143
|
+
{ "status_code" => 500 },
|
144
|
+
{ "status_code" => 502 },
|
145
|
+
{ "status_code" => 503 },
|
146
|
+
{ "status_code" => 504 },
|
147
|
+
]
|
148
|
+
|
149
|
+
data_input.each do |d|
|
150
|
+
driver = create_driver(<<-CONFIG)
|
151
|
+
table foo
|
152
|
+
email foo@bar.example
|
153
|
+
private_key_path /path/to/key
|
154
|
+
project yourproject_id
|
155
|
+
dataset yourdataset_id
|
156
|
+
|
157
|
+
<inject>
|
158
|
+
time_format %s
|
159
|
+
time_key time
|
160
|
+
</inject>
|
161
|
+
|
162
|
+
schema [
|
163
|
+
{"name": "time", "type": "INTEGER"},
|
164
|
+
{"name": "status", "type": "INTEGER"},
|
165
|
+
{"name": "bytes", "type": "INTEGER"},
|
166
|
+
{"name": "vhost", "type": "STRING"},
|
167
|
+
{"name": "path", "type": "STRING"},
|
168
|
+
{"name": "method", "type": "STRING"},
|
169
|
+
{"name": "protocol", "type": "STRING"},
|
170
|
+
{"name": "agent", "type": "STRING"},
|
171
|
+
{"name": "referer", "type": "STRING"},
|
172
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
173
|
+
{"name": "host", "type": "STRING"},
|
174
|
+
{"name": "ip", "type": "STRING"},
|
175
|
+
{"name": "user", "type": "STRING"}
|
176
|
+
]},
|
177
|
+
{"name": "requesttime", "type": "FLOAT"},
|
178
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
179
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
180
|
+
]
|
181
|
+
<secondary>
|
182
|
+
type file
|
183
|
+
path error
|
184
|
+
utc
|
185
|
+
</secondary>
|
186
|
+
CONFIG
|
187
|
+
|
188
|
+
entry = {a: "b"}
|
189
|
+
stub_writer do |writer|
|
190
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
191
|
+
rows: [{json: hash_including(entry)}],
|
192
|
+
skip_invalid_rows: false,
|
193
|
+
ignore_unknown_values: false
|
194
|
+
}, {}) do
|
195
|
+
ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
|
196
|
+
raise ex
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
assert_raise(Fluent::BigQuery::RetryableError) do
|
201
|
+
driver.run do
|
202
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b"})
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_write_with_not_retryable_error
|
209
|
+
driver = create_driver(<<-CONFIG)
|
210
|
+
table foo
|
211
|
+
email foo@bar.example
|
212
|
+
private_key_path /path/to/key
|
213
|
+
project yourproject_id
|
214
|
+
dataset yourdataset_id
|
215
|
+
|
216
|
+
<inject>
|
217
|
+
time_format %s
|
218
|
+
time_key time
|
219
|
+
</inject>
|
220
|
+
|
221
|
+
schema [
|
222
|
+
{"name": "time", "type": "INTEGER"},
|
223
|
+
{"name": "status", "type": "INTEGER"},
|
224
|
+
{"name": "bytes", "type": "INTEGER"},
|
225
|
+
{"name": "vhost", "type": "STRING"},
|
226
|
+
{"name": "path", "type": "STRING"},
|
227
|
+
{"name": "method", "type": "STRING"},
|
228
|
+
{"name": "protocol", "type": "STRING"},
|
229
|
+
{"name": "agent", "type": "STRING"},
|
230
|
+
{"name": "referer", "type": "STRING"},
|
231
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
232
|
+
{"name": "host", "type": "STRING"},
|
233
|
+
{"name": "ip", "type": "STRING"},
|
234
|
+
{"name": "user", "type": "STRING"}
|
235
|
+
]},
|
236
|
+
{"name": "requesttime", "type": "FLOAT"},
|
237
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
238
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
239
|
+
]
|
240
|
+
<secondary>
|
241
|
+
type file
|
242
|
+
path error
|
243
|
+
utc
|
244
|
+
</secondary>
|
245
|
+
CONFIG
|
246
|
+
|
247
|
+
entry = {a: "b"}
|
248
|
+
stub_writer do |writer|
|
249
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
250
|
+
rows: [{json: hash_including(entry)}],
|
251
|
+
skip_invalid_rows: false,
|
252
|
+
ignore_unknown_values: false
|
253
|
+
}, {}) do
|
254
|
+
ex = Google::Apis::ServerError.new("error", status_code: 501)
|
255
|
+
def ex.reason
|
256
|
+
"invalid"
|
257
|
+
end
|
258
|
+
raise ex
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
driver.instance_start
|
263
|
+
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
264
|
+
metadata = driver.instance.metadata_for_test(tag, time, record)
|
265
|
+
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
266
|
+
c.append([driver.instance.format(tag, time, record)])
|
267
|
+
end
|
268
|
+
assert_raise Fluent::BigQuery::UnRetryableError do
|
269
|
+
driver.instance.write(chunk)
|
270
|
+
end
|
271
|
+
assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
|
272
|
+
driver.instance_shutdown
|
273
|
+
end
|
274
|
+
|
275
|
+
def test_write_with_row_based_table_id_formatting
|
276
|
+
entry = [
|
277
|
+
{json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")}},
|
278
|
+
]
|
279
|
+
driver = create_driver(<<-CONFIG)
|
280
|
+
<buffer created_at>
|
281
|
+
</buffer>
|
282
|
+
table foo_${created_at}
|
283
|
+
email foo@bar.example
|
284
|
+
private_key_path /path/to/key
|
285
|
+
project yourproject_id
|
286
|
+
dataset yourdataset_id
|
287
|
+
|
288
|
+
schema [
|
289
|
+
{"name": "time", "type": "INTEGER"}
|
290
|
+
]
|
291
|
+
CONFIG
|
292
|
+
|
293
|
+
stub_writer do |writer|
|
294
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
|
295
|
+
rows: [entry[0]],
|
296
|
+
skip_invalid_rows: false,
|
297
|
+
ignore_unknown_values: false
|
298
|
+
}, {}) { stub!.insert_errors { nil } }
|
299
|
+
end
|
300
|
+
|
301
|
+
driver.run do
|
302
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).strftime("%Y_%m_%d")})
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_auto_create_table_by_bigquery_api
|
307
|
+
now = Time.at(Time.now.to_i)
|
308
|
+
message = {
|
309
|
+
"time" => now.to_i,
|
310
|
+
"request" => {
|
311
|
+
"vhost" => "bar",
|
312
|
+
"path" => "/path/to/baz",
|
313
|
+
"method" => "GET",
|
314
|
+
"protocol" => "HTTP/1.0",
|
315
|
+
"agent" => "libwww",
|
316
|
+
"referer" => "http://referer.example",
|
317
|
+
"time" => (now - 1).to_f,
|
318
|
+
"bot_access" => true,
|
319
|
+
"loginsession" => false,
|
320
|
+
},
|
321
|
+
"remote" => {
|
322
|
+
"host" => "remote.example",
|
323
|
+
"ip" => "192.168.1.1",
|
324
|
+
"user" => "nagachika",
|
325
|
+
},
|
326
|
+
"response" => {
|
327
|
+
"status" => 200,
|
328
|
+
"bytes" => 72,
|
329
|
+
},
|
330
|
+
}
|
331
|
+
|
332
|
+
driver = create_driver(<<-CONFIG)
|
333
|
+
table foo
|
334
|
+
email foo@bar.example
|
335
|
+
private_key_path /path/to/key
|
336
|
+
project yourproject_id
|
337
|
+
dataset yourdataset_id
|
338
|
+
|
339
|
+
<inject>
|
340
|
+
time_format %s
|
341
|
+
time_key time
|
342
|
+
</inject>
|
343
|
+
|
344
|
+
auto_create_table true
|
345
|
+
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
346
|
+
CONFIG
|
347
|
+
|
348
|
+
stub_writer do |writer|
|
349
|
+
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
|
350
|
+
raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
|
351
|
+
end
|
352
|
+
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
|
353
|
+
end
|
354
|
+
|
355
|
+
assert_raise(RuntimeError) do
|
356
|
+
driver.run do
|
357
|
+
driver.feed("tag", Fluent::EventTime.from_time(now), message)
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def test_auto_create_partitioned_table_by_bigquery_api
|
363
|
+
now = Time.now
|
364
|
+
message = {
|
365
|
+
json: {
|
366
|
+
time: now.to_i,
|
367
|
+
request: {
|
368
|
+
vhost: "bar",
|
369
|
+
path: "/path/to/baz",
|
370
|
+
method: "GET",
|
371
|
+
protocol: "HTTP/1.0",
|
372
|
+
agent: "libwww",
|
373
|
+
referer: "http://referer.example",
|
374
|
+
time: (now - 1).to_f,
|
375
|
+
bot_access: true,
|
376
|
+
loginsession: false,
|
377
|
+
},
|
378
|
+
remote: {
|
379
|
+
host: "remote.example",
|
380
|
+
ip: "192.168.1.1",
|
381
|
+
user: "nagachika",
|
382
|
+
},
|
383
|
+
response: {
|
384
|
+
status: 200,
|
385
|
+
bytes: 72,
|
386
|
+
},
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
driver = create_driver(<<-CONFIG)
|
391
|
+
table foo
|
392
|
+
email foo@bar.example
|
393
|
+
private_key_path /path/to/key
|
394
|
+
project yourproject_id
|
395
|
+
dataset yourdataset_id
|
396
|
+
|
397
|
+
time_format %s
|
398
|
+
time_field time
|
399
|
+
|
400
|
+
auto_create_table true
|
401
|
+
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
402
|
+
|
403
|
+
time_partitioning_type day
|
404
|
+
time_partitioning_expiration 1h
|
405
|
+
CONFIG
|
406
|
+
|
407
|
+
stub_writer do |writer|
|
408
|
+
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
|
409
|
+
raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
|
410
|
+
end
|
411
|
+
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
|
412
|
+
end
|
413
|
+
|
414
|
+
assert_raise(RuntimeError) do
|
415
|
+
driver.run do
|
416
|
+
driver.feed("tag", Fluent::EventTime.now, message[:json])
|
417
|
+
end
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
@@ -0,0 +1,310 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class BigQueryLoadOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
9
|
+
CONFIG = %[
|
10
|
+
table foo
|
11
|
+
email foo@bar.example
|
12
|
+
private_key_path /path/to/key
|
13
|
+
project yourproject_id
|
14
|
+
dataset yourdataset_id
|
15
|
+
|
16
|
+
<buffer>
|
17
|
+
@type memory
|
18
|
+
</buffer>
|
19
|
+
|
20
|
+
<inject>
|
21
|
+
time_format %s
|
22
|
+
time_key time
|
23
|
+
</inject>
|
24
|
+
|
25
|
+
schema_path #{SCHEMA_PATH}
|
26
|
+
wait_job_interval 0.1
|
27
|
+
]
|
28
|
+
|
29
|
+
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
30
|
+
|
31
|
+
def create_driver(conf = CONFIG)
|
32
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryLoadOutput).configure(conf)
|
33
|
+
end
|
34
|
+
|
35
|
+
def stub_writer(stub_auth: true)
|
36
|
+
stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
|
37
|
+
stub(writer).get_auth { nil } if stub_auth
|
38
|
+
yield writer
|
39
|
+
writer
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_write
|
44
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
45
|
+
|
46
|
+
response_stub = stub!
|
47
|
+
|
48
|
+
driver = create_driver
|
49
|
+
stub_writer do |writer|
|
50
|
+
mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
|
51
|
+
mock(writer).commit_load_job(is_a(String), response_stub)
|
52
|
+
|
53
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
54
|
+
|
55
|
+
mock(writer.client).insert_job('yourproject_id', {
|
56
|
+
configuration: {
|
57
|
+
load: {
|
58
|
+
destination_table: {
|
59
|
+
project_id: 'yourproject_id',
|
60
|
+
dataset_id: 'yourdataset_id',
|
61
|
+
table_id: 'foo',
|
62
|
+
},
|
63
|
+
schema: {
|
64
|
+
fields: schema_fields,
|
65
|
+
},
|
66
|
+
write_disposition: "WRITE_APPEND",
|
67
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
68
|
+
ignore_unknown_values: false,
|
69
|
+
max_bad_records: 0,
|
70
|
+
}
|
71
|
+
}
|
72
|
+
}, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
|
73
|
+
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
driver.run do
|
78
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b"})
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_write_with_prevent_duplicate_load
|
83
|
+
driver = create_driver(<<-CONFIG)
|
84
|
+
table foo
|
85
|
+
email foo@bar.example
|
86
|
+
private_key_path /path/to/key
|
87
|
+
project yourproject_id
|
88
|
+
dataset yourdataset_id
|
89
|
+
|
90
|
+
<buffer>
|
91
|
+
@type memory
|
92
|
+
</buffer>
|
93
|
+
|
94
|
+
<inject>
|
95
|
+
time_format %s
|
96
|
+
time_key time
|
97
|
+
</inject>
|
98
|
+
|
99
|
+
schema_path #{SCHEMA_PATH}
|
100
|
+
prevent_duplicate_load true
|
101
|
+
CONFIG
|
102
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
103
|
+
|
104
|
+
response_stub = stub!
|
105
|
+
stub_writer do |writer|
|
106
|
+
mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
|
107
|
+
mock(writer).commit_load_job(is_a(String), response_stub)
|
108
|
+
|
109
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
110
|
+
|
111
|
+
mock(writer.client).insert_job('yourproject_id', {
|
112
|
+
configuration: {
|
113
|
+
load: {
|
114
|
+
destination_table: {
|
115
|
+
project_id: 'yourproject_id',
|
116
|
+
dataset_id: 'yourdataset_id',
|
117
|
+
table_id: 'foo',
|
118
|
+
},
|
119
|
+
schema: {
|
120
|
+
fields: schema_fields,
|
121
|
+
},
|
122
|
+
write_disposition: "WRITE_APPEND",
|
123
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
124
|
+
ignore_unknown_values: false,
|
125
|
+
max_bad_records: 0,
|
126
|
+
},
|
127
|
+
},
|
128
|
+
job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
|
129
|
+
}, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
|
130
|
+
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
driver.run do
|
135
|
+
driver.feed("tag", Time.now.to_i, {"a" => "b"})
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_write_with_retryable_error
|
140
|
+
driver = create_driver
|
141
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
142
|
+
|
143
|
+
driver.instance_start
|
144
|
+
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
145
|
+
metadata = driver.instance.metadata_for_test(tag, time, record)
|
146
|
+
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
147
|
+
c.append([driver.instance.format(tag, time, record)])
|
148
|
+
end
|
149
|
+
|
150
|
+
stub_writer do |writer|
|
151
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
152
|
+
|
153
|
+
mock(writer.client).insert_job('yourproject_id', {
|
154
|
+
configuration: {
|
155
|
+
load: {
|
156
|
+
destination_table: {
|
157
|
+
project_id: 'yourproject_id',
|
158
|
+
dataset_id: 'yourdataset_id',
|
159
|
+
table_id: 'foo',
|
160
|
+
},
|
161
|
+
schema: {
|
162
|
+
fields: schema_fields,
|
163
|
+
},
|
164
|
+
write_disposition: "WRITE_APPEND",
|
165
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
166
|
+
ignore_unknown_values: false,
|
167
|
+
max_bad_records: 0,
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
|
171
|
+
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
172
|
+
end
|
173
|
+
|
174
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
|
175
|
+
stub! do |s|
|
176
|
+
s.id { 'dummy_job_id' }
|
177
|
+
s.configuration.stub! do |_s|
|
178
|
+
_s.load.stub! do |__s|
|
179
|
+
__s.destination_table.stub! do |___s|
|
180
|
+
___s.project_id { 'yourproject_id' }
|
181
|
+
___s.dataset_id { 'yourdataset_id' }
|
182
|
+
___s.table_id { 'foo' }
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
s.status.stub! do |_s|
|
187
|
+
_s.state { 'DONE' }
|
188
|
+
_s.errors { [] }
|
189
|
+
_s.error_result.stub! do |__s|
|
190
|
+
__s.message { 'error' }
|
191
|
+
__s.reason { 'backendError' }
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
assert_raise Fluent::BigQuery::RetryableError do
|
199
|
+
driver.instance.write(chunk)
|
200
|
+
end
|
201
|
+
driver.instance_shutdown
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_write_with_not_retryable_error
|
205
|
+
driver = create_driver(<<-CONFIG)
|
206
|
+
table foo
|
207
|
+
email foo@bar.example
|
208
|
+
private_key_path /path/to/key
|
209
|
+
project yourproject_id
|
210
|
+
dataset yourdataset_id
|
211
|
+
|
212
|
+
<buffer>
|
213
|
+
@type memory
|
214
|
+
</buffer>
|
215
|
+
|
216
|
+
<inject>
|
217
|
+
time_format %s
|
218
|
+
time_key time
|
219
|
+
</inject>
|
220
|
+
|
221
|
+
schema_path #{SCHEMA_PATH}
|
222
|
+
<secondary>
|
223
|
+
@type file
|
224
|
+
path error
|
225
|
+
utc
|
226
|
+
</secondary>
|
227
|
+
CONFIG
|
228
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
229
|
+
|
230
|
+
driver.instance_start
|
231
|
+
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
232
|
+
metadata = driver.instance.metadata_for_test(tag, time, record)
|
233
|
+
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
234
|
+
c.append([driver.instance.format(tag, time, record)])
|
235
|
+
end
|
236
|
+
|
237
|
+
stub_writer do |writer|
|
238
|
+
mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
|
239
|
+
|
240
|
+
mock(writer.client).insert_job('yourproject_id', {
|
241
|
+
configuration: {
|
242
|
+
load: {
|
243
|
+
destination_table: {
|
244
|
+
project_id: 'yourproject_id',
|
245
|
+
dataset_id: 'yourdataset_id',
|
246
|
+
table_id: 'foo',
|
247
|
+
},
|
248
|
+
schema: {
|
249
|
+
fields: schema_fields,
|
250
|
+
},
|
251
|
+
write_disposition: "WRITE_APPEND",
|
252
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
253
|
+
ignore_unknown_values: false,
|
254
|
+
max_bad_records: 0,
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
|
258
|
+
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
259
|
+
end
|
260
|
+
|
261
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
|
262
|
+
stub! do |s|
|
263
|
+
s.id { 'dummy_job_id' }
|
264
|
+
s.configuration.stub! do |_s|
|
265
|
+
_s.load.stub! do |__s|
|
266
|
+
__s.destination_table.stub! do |___s|
|
267
|
+
___s.project_id { 'yourproject_id' }
|
268
|
+
___s.dataset_id { 'yourdataset_id' }
|
269
|
+
___s.table_id { 'foo' }
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
s.status.stub! do |_s|
|
274
|
+
_s.state { 'DONE' }
|
275
|
+
_s.errors { [] }
|
276
|
+
_s.error_result.stub! do |__s|
|
277
|
+
__s.message { 'error' }
|
278
|
+
__s.reason { 'invalid' }
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
assert_raise Fluent::BigQuery::UnRetryableError do
|
286
|
+
driver.instance.write(chunk)
|
287
|
+
end
|
288
|
+
assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
|
289
|
+
driver.instance_shutdown
|
290
|
+
end
|
291
|
+
|
292
|
+
private
|
293
|
+
|
294
|
+
def create_response_stub(response)
|
295
|
+
case response
|
296
|
+
when Hash
|
297
|
+
root = stub!
|
298
|
+
response.each do |k, v|
|
299
|
+
root.__send__(k) do
|
300
|
+
create_response_stub(v)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
root
|
304
|
+
when Array
|
305
|
+
response.map { |item| create_response_stub(item) }
|
306
|
+
else
|
307
|
+
response
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|