fluent-plugin-bigquery-test 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,348 @@
1
+ require 'helper'
2
+
3
+ class BigQueryLoadOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
9
+ CONFIG = %[
10
+ table foo
11
+ email foo@bar.example
12
+ private_key_path /path/to/key
13
+ project yourproject_id
14
+ dataset yourdataset_id
15
+
16
+ <buffer>
17
+ @type memory
18
+ </buffer>
19
+
20
+ <inject>
21
+ time_format %s
22
+ time_key time
23
+ </inject>
24
+
25
+ schema_path #{SCHEMA_PATH}
26
+ wait_job_interval 0.1
27
+ ]
28
+
29
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
30
+
31
+ def create_driver(conf = CONFIG)
32
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryLoadOutput).configure(conf)
33
+ end
34
+
35
+ def stub_writer(stub_auth: true)
36
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
37
+ stub(writer).get_auth { nil } if stub_auth
38
+ yield writer
39
+ writer
40
+ end
41
+ end
42
+
43
+ def test_write
44
+ response_stub = stub!
45
+
46
+ driver = create_driver
47
+ stub_writer do |writer|
48
+ mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
49
+ mock(writer).commit_load_job(is_a(String), response_stub)
50
+
51
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
52
+
53
+ mock(writer.client).insert_job('yourproject_id', {
54
+ configuration: {
55
+ load: {
56
+ destination_table: {
57
+ project_id: 'yourproject_id',
58
+ dataset_id: 'yourdataset_id',
59
+ table_id: 'foo',
60
+ },
61
+ write_disposition: "WRITE_APPEND",
62
+ source_format: "NEWLINE_DELIMITED_JSON",
63
+ ignore_unknown_values: false,
64
+ max_bad_records: 0,
65
+ }
66
+ }
67
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
68
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
69
+ end
70
+ end
71
+
72
+ driver.run do
73
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
74
+ end
75
+ end
76
+
77
+ def test_write_with_prevent_duplicate_load
78
+ driver = create_driver(<<-CONFIG)
79
+ table foo
80
+ email foo@bar.example
81
+ private_key_path /path/to/key
82
+ project yourproject_id
83
+ dataset yourdataset_id
84
+
85
+ <buffer>
86
+ @type memory
87
+ </buffer>
88
+
89
+ <inject>
90
+ time_format %s
91
+ time_key time
92
+ </inject>
93
+
94
+ schema_path #{SCHEMA_PATH}
95
+ prevent_duplicate_load true
96
+ CONFIG
97
+
98
+ response_stub = stub!
99
+ stub_writer do |writer|
100
+ mock(writer).fetch_load_job(is_a(Fluent::BigQuery::Writer::JobReference)) { response_stub }
101
+ mock(writer).commit_load_job(is_a(String), response_stub)
102
+
103
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
104
+
105
+ mock(writer.client).insert_job('yourproject_id', {
106
+ configuration: {
107
+ load: {
108
+ destination_table: {
109
+ project_id: 'yourproject_id',
110
+ dataset_id: 'yourdataset_id',
111
+ table_id: 'foo',
112
+ },
113
+ write_disposition: "WRITE_APPEND",
114
+ source_format: "NEWLINE_DELIMITED_JSON",
115
+ ignore_unknown_values: false,
116
+ max_bad_records: 0,
117
+ },
118
+ },
119
+ job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
120
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
121
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
122
+ end
123
+ end
124
+
125
+ driver.run do
126
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
127
+ end
128
+ end
129
+
130
+ def test_write_with_retryable_error
131
+ driver = create_driver
132
+
133
+ driver.instance_start
134
+ tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
136
+ chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
137
+ c.append([driver.instance.format(tag, time, record)])
138
+ end
139
+
140
+ stub_writer do |writer|
141
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
142
+
143
+ mock(writer.client).insert_job('yourproject_id', {
144
+ configuration: {
145
+ load: {
146
+ destination_table: {
147
+ project_id: 'yourproject_id',
148
+ dataset_id: 'yourdataset_id',
149
+ table_id: 'foo',
150
+ },
151
+ write_disposition: "WRITE_APPEND",
152
+ source_format: "NEWLINE_DELIMITED_JSON",
153
+ ignore_unknown_values: false,
154
+ max_bad_records: 0,
155
+ }
156
+ }
157
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
158
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
159
+ end
160
+
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
162
+ stub! do |s|
163
+ s.id { 'dummy_job_id' }
164
+ s.configuration.stub! do |_s|
165
+ _s.load.stub! do |__s|
166
+ __s.destination_table.stub! do |___s|
167
+ ___s.project_id { 'yourproject_id' }
168
+ ___s.dataset_id { 'yourdataset_id' }
169
+ ___s.table_id { 'foo' }
170
+ end
171
+ end
172
+ end
173
+ s.status.stub! do |_s|
174
+ _s.state { 'DONE' }
175
+ _s.errors { [] }
176
+ _s.error_result.stub! do |__s|
177
+ __s.message { 'error' }
178
+ __s.reason { 'backendError' }
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ assert_raise Fluent::BigQuery::RetryableError do
186
+ driver.instance.write(chunk)
187
+ end
188
+ driver.instance_shutdown
189
+ end
190
+
191
+ def test_write_with_not_retryable_error
192
+ driver = create_driver(<<-CONFIG)
193
+ table foo
194
+ email foo@bar.example
195
+ private_key_path /path/to/key
196
+ project yourproject_id
197
+ dataset yourdataset_id
198
+
199
+ <buffer>
200
+ @type memory
201
+ </buffer>
202
+
203
+ <inject>
204
+ time_format %s
205
+ time_key time
206
+ </inject>
207
+
208
+ schema_path #{SCHEMA_PATH}
209
+ <secondary>
210
+ @type file
211
+ path error
212
+ utc
213
+ </secondary>
214
+ CONFIG
215
+
216
+ driver.instance_start
217
+ tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
219
+ chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
220
+ c.append([driver.instance.format(tag, time, record)])
221
+ end
222
+
223
+ stub_writer do |writer|
224
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') { nil }
225
+
226
+ mock(writer.client).insert_job('yourproject_id', {
227
+ configuration: {
228
+ load: {
229
+ destination_table: {
230
+ project_id: 'yourproject_id',
231
+ dataset_id: 'yourdataset_id',
232
+ table_id: 'foo',
233
+ },
234
+ write_disposition: "WRITE_APPEND",
235
+ source_format: "NEWLINE_DELIMITED_JSON",
236
+ ignore_unknown_values: false,
237
+ max_bad_records: 0,
238
+ }
239
+ }
240
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
241
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
242
+ end
243
+
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
245
+ stub! do |s|
246
+ s.id { 'dummy_job_id' }
247
+ s.configuration.stub! do |_s|
248
+ _s.load.stub! do |__s|
249
+ __s.destination_table.stub! do |___s|
250
+ ___s.project_id { 'yourproject_id' }
251
+ ___s.dataset_id { 'yourdataset_id' }
252
+ ___s.table_id { 'foo' }
253
+ end
254
+ end
255
+ end
256
+ s.status.stub! do |_s|
257
+ _s.state { 'DONE' }
258
+ _s.errors { [] }
259
+ _s.error_result.stub! do |__s|
260
+ __s.message { 'error' }
261
+ __s.reason { 'invalid' }
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ assert_raise Fluent::BigQuery::UnRetryableError do
269
+ driver.instance.write(chunk)
270
+ end
271
+ assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
272
+ driver.instance_shutdown
273
+ end
274
+
275
+ def test_write_with_auto_create_table
276
+ driver = create_driver(<<-CONFIG)
277
+ table foo
278
+ email foo@bar.example
279
+ private_key_path /path/to/key
280
+ project yourproject_id
281
+ dataset yourdataset_id
282
+
283
+ <buffer>
284
+ @type memory
285
+ </buffer>
286
+
287
+ <inject>
288
+ time_format %s
289
+ time_key time
290
+ </inject>
291
+
292
+ auto_create_table true
293
+ schema_path #{SCHEMA_PATH}
294
+ CONFIG
295
+
296
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
297
+
298
+ stub_writer do |writer|
299
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
300
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
301
+ end
302
+
303
+ mock(writer.client).insert_job('yourproject_id', {
304
+ configuration: {
305
+ load: {
306
+ destination_table: {
307
+ project_id: 'yourproject_id',
308
+ dataset_id: 'yourdataset_id',
309
+ table_id: 'foo',
310
+ },
311
+ write_disposition: "WRITE_APPEND",
312
+ source_format: "NEWLINE_DELIMITED_JSON",
313
+ ignore_unknown_values: false,
314
+ max_bad_records: 0,
315
+ schema: {
316
+ fields: schema_fields,
317
+ },
318
+ }
319
+ }
320
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
321
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
+ end
323
+ end
324
+
325
+ driver.run do
326
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
327
+ end
328
+ end
329
+
330
+ private
331
+
332
+ def create_response_stub(response)
333
+ case response
334
+ when Hash
335
+ root = stub!
336
+ response.each do |k, v|
337
+ root.__send__(k) do
338
+ create_response_stub(v)
339
+ end
340
+ end
341
+ root
342
+ when Array
343
+ response.map { |item| create_response_stub(item) }
344
+ else
345
+ response
346
+ end
347
+ end
348
+ end
@@ -0,0 +1,186 @@
1
+ require 'helper'
2
+
3
+ class RecordSchemaTest < Test::Unit::TestCase
4
+ def base_schema
5
+ [
6
+ {
7
+ "name" => "time",
8
+ "type" => "TIMESTAMP",
9
+ "mode" => "REQUIRED"
10
+ },
11
+ {
12
+ "name" => "tty",
13
+ "type" => "STRING",
14
+ "mode" => "NULLABLE"
15
+ },
16
+ {
17
+ "name" => "pwd",
18
+ "type" => "STRING",
19
+ "mode" => "REQUIRED"
20
+ },
21
+ {
22
+ "name" => "user",
23
+ "type" => "STRING",
24
+ "mode" => "REQUIRED"
25
+ },
26
+ {
27
+ "name" => "argv",
28
+ "type" => "STRING",
29
+ "mode" => "REPEATED"
30
+ },
31
+ {
32
+ "name" => "utilisation",
33
+ "type" => "NUMERIC",
34
+ "mode" => "NULLABLE"
35
+ }
36
+ ]
37
+ end
38
+
39
+ def base_schema_with_new_column
40
+ [
41
+ {
42
+ "name" => "time",
43
+ "type" => "TIMESTAMP",
44
+ "mode" => "REQUIRED"
45
+ },
46
+ {
47
+ "name" => "tty",
48
+ "type" => "STRING",
49
+ "mode" => "NULLABLE"
50
+ },
51
+ {
52
+ "name" => "pwd",
53
+ "type" => "STRING",
54
+ "mode" => "REQUIRED"
55
+ },
56
+ {
57
+ "name" => "user",
58
+ "type" => "STRING",
59
+ "mode" => "REQUIRED"
60
+ },
61
+ {
62
+ "name" => "argv",
63
+ "type" => "STRING",
64
+ "mode" => "REPEATED"
65
+ },
66
+ {
67
+ "name" => "utilisation",
68
+ "type" => "NUMERIC",
69
+ "mode" => "NULLABLE"
70
+ },
71
+ {
72
+ "name" => "new_column",
73
+ "type" => "STRING",
74
+ "mode" => "REQUIRED"
75
+ }
76
+ ]
77
+ end
78
+
79
+ def base_schema_with_type_changed_column
80
+ [
81
+ {
82
+ "name" => "time",
83
+ "type" => "INTEGER", # change type
84
+ "mode" => "REQUIRED"
85
+ },
86
+ {
87
+ "name" => "tty",
88
+ "type" => "STRING",
89
+ "mode" => "NULLABLE"
90
+ },
91
+ {
92
+ "name" => "pwd",
93
+ "type" => "STRING",
94
+ "mode" => "REQUIRED"
95
+ },
96
+ {
97
+ "name" => "user",
98
+ "type" => "STRING",
99
+ "mode" => "REQUIRED"
100
+ },
101
+ {
102
+ "name" => "argv",
103
+ "type" => "STRING",
104
+ "mode" => "REPEATED"
105
+ },
106
+ {
107
+ "name" => "utilisation",
108
+ "type" => "NUMERIC",
109
+ "mode" => "NULLABLE"
110
+ }
111
+ ]
112
+ end
113
+
114
+ def test_load_schema
115
+ fields = Fluent::BigQuery::RecordSchema.new("record")
116
+ fields.load_schema(base_schema)
117
+ assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema }
118
+ end
119
+
120
+ def test_load_schema_allow_overwrite_with_type_changed_column
121
+ fields = Fluent::BigQuery::RecordSchema.new("record")
122
+ fields.load_schema(base_schema)
123
+
124
+ fields.load_schema(base_schema_with_type_changed_column)
125
+ assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema_with_type_changed_column }
126
+ end
127
+
128
+ def test_load_schema_allow_overwrite_with_new_column
129
+ fields = Fluent::BigQuery::RecordSchema.new("record")
130
+ fields.load_schema(base_schema)
131
+
132
+ fields.load_schema(base_schema_with_new_column)
133
+ assert { Fluent::BigQuery::Helper.deep_stringify_keys(fields.to_a) == base_schema_with_new_column }
134
+ end
135
+
136
+ def test_format_one
137
+ fields = Fluent::BigQuery::RecordSchema.new("record")
138
+ fields.load_schema(base_schema)
139
+
140
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
141
+
142
+ formatted = fields.format_one({
143
+ "time" => time, "tty" => nil, "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", 42]
144
+ })
145
+ assert_equal(
146
+ formatted,
147
+ {
148
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", "42"]
149
+ }
150
+ )
151
+ end
152
+
153
+ def test_format_one_convert_array_or_hash_to_json
154
+ fields = Fluent::BigQuery::RecordSchema.new("record")
155
+ fields.load_schema(base_schema)
156
+
157
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
158
+
159
+ formatted = fields.format_one({
160
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
161
+ })
162
+ assert_equal(
163
+ formatted,
164
+ {
165
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
166
+ }
167
+ )
168
+ end
169
+
170
+ def test_format_one_with_extra_column
171
+ fields = Fluent::BigQuery::RecordSchema.new("record")
172
+ fields.load_schema(base_schema)
173
+
174
+ time = Time.local(2016, 2, 7, 19, 0, 0).utc
175
+
176
+ formatted = fields.format_one({
177
+ "time" => time, "tty" => nil, "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", 42.195], "extra" => "extra_data"
178
+ })
179
+ assert_equal(
180
+ formatted,
181
+ {
182
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "pwd" => "/home", "user" => "joker1007", "argv" => ["foo", "42.195"], "extra" => "extra_data"
183
+ }
184
+ )
185
+ end
186
+ end