fluent-plugin-bigquery-test 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,348 @@
1
+ require 'helper'
2
+
3
+ class BigQueryLoadOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
9
+ CONFIG = %[
10
+ table foo
11
+ email foo@bar.example
12
+ private_key_path /path/to/key
13
+ project yourproject_id
14
+ dataset yourdataset_id
15
+
16
+ <buffer>
17
+ @type memory
18
+ </buffer>
19
+
20
+ <inject>
21
+ time_format %s
22
+ time_key time
23
+ </inject>
24
+
25
+ schema_path #{SCHEMA_PATH}
26
+ wait_job_interval 0.1
27
+ ]
28
+
29
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
30
+
31
+ def create_driver(conf = CONFIG)
32
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryLoadOutput).configure(conf)
33
+ end
34
+
35
+ def stub_writer(stub_auth: true)
36
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
37
+ stub(writer).get_auth { nil } if stub_auth
38
+ yield writer
39
+ writer
40
+ end
41
+ end
42
+
43
+ def test_write
44
+ response_stub = stub!
45
+
46
+ driver = create_driver
47
+ stub_writer do |writer|
48
+ mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
49
+ mock(writer).commit_load_job(is_a(String), response_stub)
50
+
51
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
52
+
53
+ mock(writer.client).insert_job('yourproject_id', {
54
+ configuration: {
55
+ load: {
56
+ destination_table: {
57
+ project_id: 'yourproject_id',
58
+ dataset_id: 'yourdataset_id',
59
+ table_id: 'foo',
60
+ },
61
+ write_disposition: "WRITE_APPEND",
62
+ source_format: "NEWLINE_DELIMITED_JSON",
63
+ ignore_unknown_values: false,
64
+ max_bad_records: 0,
65
+ }
66
+ }
67
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
68
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
69
+ end
70
+ end
71
+
72
+ driver.run do
73
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
74
+ end
75
+ end
76
+
77
+ def test_write_with_prevent_duplicate_load
78
+ driver = create_driver(<<-CONFIG)
79
+ table foo
80
+ email foo@bar.example
81
+ private_key_path /path/to/key
82
+ project yourproject_id
83
+ dataset yourdataset_id
84
+
85
+ <buffer>
86
+ @type memory
87
+ </buffer>
88
+
89
+ <inject>
90
+ time_format %s
91
+ time_key time
92
+ </inject>
93
+
94
+ schema_path #{SCHEMA_PATH}
95
+ prevent_duplicate_load true
96
+ CONFIG
97
+
98
+ response_stub = stub!
99
+ stub_writer do |writer|
100
+ mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
101
+ mock(writer).commit_load_job(is_a(String), response_stub)
102
+
103
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
104
+
105
+ mock(writer.client).insert_job('yourproject_id', {
106
+ configuration: {
107
+ load: {
108
+ destination_table: {
109
+ project_id: 'yourproject_id',
110
+ dataset_id: 'yourdataset_id',
111
+ table_id: 'foo',
112
+ },
113
+ write_disposition: "WRITE_APPEND",
114
+ source_format: "NEWLINE_DELIMITED_JSON",
115
+ ignore_unknown_values: false,
116
+ max_bad_records: 0,
117
+ },
118
+ },
119
+ job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
120
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
121
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
122
+ end
123
+ end
124
+
125
+ driver.run do
126
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
127
+ end
128
+ end
129
+
130
+ def test_write_with_retryable_error
131
+ driver = create_driver
132
+
133
+ driver.instance_start
134
+ tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
136
+ chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
137
+ c.append([driver.instance.format(tag, time, record)])
138
+ end
139
+
140
+ stub_writer do |writer|
141
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
142
+
143
+ mock(writer.client).insert_job('yourproject_id', {
144
+ configuration: {
145
+ load: {
146
+ destination_table: {
147
+ project_id: 'yourproject_id',
148
+ dataset_id: 'yourdataset_id',
149
+ table_id: 'foo',
150
+ },
151
+ write_disposition: "WRITE_APPEND",
152
+ source_format: "NEWLINE_DELIMITED_JSON",
153
+ ignore_unknown_values: false,
154
+ max_bad_records: 0,
155
+ }
156
+ }
157
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
158
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
159
+ end
160
+
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
162
+ stub! do |s|
163
+ s.id { 'dummy_job_id' }
164
+ s.configuration.stub! do |_s|
165
+ _s.load.stub! do |__s|
166
+ __s.destination_table.stub! do |___s|
167
+ ___s.project_id { 'yourproject_id' }
168
+ ___s.dataset_id { 'yourdataset_id' }
169
+ ___s.table_id { 'foo' }
170
+ end
171
+ end
172
+ end
173
+ s.status.stub! do |_s|
174
+ _s.state { 'DONE' }
175
+ _s.errors { [] }
176
+ _s.error_result.stub! do |__s|
177
+ __s.message { 'error' }
178
+ __s.reason { 'backendError' }
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ assert_raise Fluent::BigQuery::RetryableError do
186
+ driver.instance.write(chunk)
187
+ end
188
+ driver.instance_shutdown
189
+ end
190
+
191
+ def test_write_with_not_retryable_error
192
+ driver = create_driver(<<-CONFIG)
193
+ table foo
194
+ email foo@bar.example
195
+ private_key_path /path/to/key
196
+ project yourproject_id
197
+ dataset yourdataset_id
198
+
199
+ <buffer>
200
+ @type memory
201
+ </buffer>
202
+
203
+ <inject>
204
+ time_format %s
205
+ time_key time
206
+ </inject>
207
+
208
+ schema_path #{SCHEMA_PATH}
209
+ <secondary>
210
+ @type file
211
+ path error
212
+ utc
213
+ </secondary>
214
+ CONFIG
215
+
216
+ driver.instance_start
217
+ tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
219
+ chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
220
+ c.append([driver.instance.format(tag, time, record)])
221
+ end
222
+
223
+ stub_writer do |writer|
224
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
225
+
226
+ mock(writer.client).insert_job('yourproject_id', {
227
+ configuration: {
228
+ load: {
229
+ destination_table: {
230
+ project_id: 'yourproject_id',
231
+ dataset_id: 'yourdataset_id',
232
+ table_id: 'foo',
233
+ },
234
+ write_disposition: "WRITE_APPEND",
235
+ source_format: "NEWLINE_DELIMITED_JSON",
236
+ ignore_unknown_values: false,
237
+ max_bad_records: 0,
238
+ }
239
+ }
240
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
241
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
242
+ end
243
+
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
245
+ stub! do |s|
246
+ s.id { 'dummy_job_id' }
247
+ s.configuration.stub! do |_s|
248
+ _s.load.stub! do |__s|
249
+ __s.destination_table.stub! do |___s|
250
+ ___s.project_id { 'yourproject_id' }
251
+ ___s.dataset_id { 'yourdataset_id' }
252
+ ___s.table_id { 'foo' }
253
+ end
254
+ end
255
+ end
256
+ s.status.stub! do |_s|
257
+ _s.state { 'DONE' }
258
+ _s.errors { [] }
259
+ _s.error_result.stub! do |__s|
260
+ __s.message { 'error' }
261
+ __s.reason { 'invalid' }
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ assert_raise Fluent::BigQuery::UnRetryableError do
269
+ driver.instance.write(chunk)
270
+ end
271
+ assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
272
+ driver.instance_shutdown
273
+ end
274
+
275
+ def test_write_with_auto_create_table
276
+ driver = create_driver(<<-CONFIG)
277
+ table foo
278
+ email foo@bar.example
279
+ private_key_path /path/to/key
280
+ project yourproject_id
281
+ dataset yourdataset_id
282
+
283
+ <buffer>
284
+ @type memory
285
+ </buffer>
286
+
287
+ <inject>
288
+ time_format %s
289
+ time_key time
290
+ </inject>
291
+
292
+ auto_create_table true
293
+ schema_path #{SCHEMA_PATH}
294
+ CONFIG
295
+
296
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
297
+
298
+ stub_writer do |writer|
299
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
300
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
301
+ end
302
+
303
+ mock(writer.client).insert_job('yourproject_id', {
304
+ configuration: {
305
+ load: {
306
+ destination_table: {
307
+ project_id: 'yourproject_id',
308
+ dataset_id: 'yourdataset_id',
309
+ table_id: 'foo',
310
+ },
311
+ write_disposition: "WRITE_APPEND",
312
+ source_format: "NEWLINE_DELIMITED_JSON",
313
+ ignore_unknown_values: false,
314
+ max_bad_records: 0,
315
+ schema: {
316
+ fields: schema_fields,
317
+ },
318
+ }
319
+ }
320
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
321
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
+ end
323
+ end
324
+
325
+ driver.run do
326
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
327
+ end
328
+ end
329
+
330
+ private
331
+
332
+ def create_response_stub(response)
333
+ case response
334
+ when Hash
335
+ root = stub!
336
+ response.each do |k, v|
337
+ root.__send__(k) do
338
+ create_response_stub(v)
339
+ end
340
+ end
341
+ root
342
+ when Array
343
+ response.map { |item| create_response_stub(item) }
344
+ else
345
+ response
346
+ end
347
+ end
348
+ end
@@ -0,0 +1,186 @@
1
+ require 'helper'
2
+
3
+ class RecordSchemaTest < Test::Unit::TestCase
4
+ def base_schema
5
+ [
6
+ {
7
+ "name" => "time",
8
+ "type" => "TIMESTAMP",
9
+ "mode" => "REQUIRED"
10
+ },
11
+ {
12
+ "name" => "tty",
13
+ "type" => "STRING",
14
+ "mode" => "NULLABLE"
15
+ },
16
+ {
17
+ "name" => "pwd",
18
+ "type" => "STRING",
19
+ "mode" => "REQUIRED"
20
+ },
21
+ {
22
+ "name" => "user",
23
+ "type" => "STRING",
24
+ "mode" => "REQUIRED"
25
+ },
26
+ {
27
+ "name" => "argv",
28
+ "type" => "STRING",
29
+ "mode" => "REPEATED"
30
+ },
31
+ {
32
+ "name" => "utilisation",
33
+ "type" => "NUMERIC",
34
+ "mode" => "NULLABLE"
35
+ }
36
+ ]
37
+ end
38
+
39
+ def base_schema_with_new_column
40
+ [
41
+ {
42
+ "name" => "time",
43
+ "type" => "TIMESTAMP",
44
+ "mode" => "REQUIRED"
45
+ },
46
+ {
47
+ "name" => "tty",
48
+ "type" => "STRING",
49
+ "mode" => "NULLABLE"
50
+ },
51
+ {
52
+ "name" => "pwd",
53
+ "type" => "STRING",
54
+ "mode" => "REQUIRED"
55
+ },
56
+ {
57
+ "name" => "user",
58
+ "type" => "STRING",
59
+ "mode" => "REQUIRED"
60
+ },
61
+ {
62
+ "name" => "argv",
63
+ "type" => "STRING",
64
+ "mode" => "REPEATED"
65
+ },
66
+ {
67
+ "name" => "utilisation",
68
+ "type" => "NUMERIC",
69
+ "mode" => "NULLABLE"
70
+ },
71
+ {
72
+ "name" => "new_column",
73
+ "type" => "STRING",
74
+ "mode" => "REQUIRED"
75
+ }
76
+ ]
77
+ end
78
+
79
+ def base_schema_with_type_changed_column
80
+ [
81
+ {
82
+ "name" => "time",
83
+ "type" => "INTEGER", # change type
84
+ "mode" => "REQUIRED"
85
+ },
86
+ {
87
+ "name" => "tty",
88
+ "type" => "STRING",
89
+ "mode" => "NULLABLE"
90
+ },
91
+ {
92
+ "name" => "pwd",
93
+ "type" => "STRING",
94
+ "mode" => "REQUIRED"
95
+ },
96
+ {
97
+ "name" => "user",
98
+ "type" => "STRING",
99
+ "mode" => "REQUIRED"
100
+ },
101
+ {
102
+ "name" => "argv",
103
+ "type" => "STRING",
104
+ "mode" => "REPEATED"
105
+ },
106
+ {
107
+ "name" => "utilisation",
108
+ "type" => "NUMERIC",
109
+ "mode" => "NULLABLE"
110
+ }
111
+ ]
112
+ end
113
+
114
+ def test_load_schema
115
+ fields = Fluent::BigQuery::RecordSchema.new("record")
116
+ fields.load_schema(base_schema)
117
+ assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema }
118
+ end
119
+
120
+ def test_load_schema_allow_overwrite_with_type_changed_column
121
+ fields = Fluent::BigQuery::RecordSchema.new("record")
122
+ fields.load_schema(base_schema)
123
+
124
+ fields.load_schema(base_schema_with_type_changed_column)
125
+ assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema_with_type_changed_column }
126
+ end
127
+
128
+ def test_load_schema_allow_overwrite_with_new_column
129
+ fields = Fluent::BigQuery::RecordSchema.new("record")
130
+ fields.load_schema(base_schema)
131
+
132
+ fields.load_schema(base_schema_with_new_column)
133
+ assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema_with_new_column }
134
+ end
135
+
136
+ def test_format_one
137
+ fields = Fluent::BigQuery::RecordSchema.new("record")
138
+ fields.load_schema(base_schema)
139
+
140
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
141
+
142
+ formatted = fields.format_one({
143
+ "time" => time, "tty" => nil, "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", 42]
144
+ })
145
+ assert_equal(
146
+ formatted,
147
+ {
148
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", "42"]
149
+ }
150
+ )
151
+ end
152
+
153
+ def test_format_one_convert_array_or_hash_to_json
154
+ fields = Fluent::BigQuery::RecordSchema.new("record")
155
+ fields.load_schema(base_schema)
156
+
157
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
158
+
159
+ formatted = fields.format_one({
160
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
161
+ })
162
+ assert_equal(
163
+ formatted,
164
+ {
165
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
166
+ }
167
+ )
168
+ end
169
+
170
+ def test_format_one_with_extra_column
171
+ fields = Fluent::BigQuery::RecordSchema.new("record")
172
+ fields.load_schema(base_schema)
173
+
174
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
175
+
176
+ formatted = fields.format_one({
177
+ "time" => time, "tty" => nil, "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", 42.195], "extra" => "extra_data"
178
+ })
179
+ assert_equal(
180
+ formatted,
181
+ {
182
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", "42.195"], "extra" => "extra_data"
183
+ }
184
+ )
185
+ end
186
+ end