fluent-plugin-bigquery 0.2.16 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +76 -3
- data/Rakefile +1 -0
- data/fluent-plugin-bigquery.gemspec +3 -5
- data/lib/fluent/plugin/bigquery/schema.rb +221 -0
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +289 -0
- data/lib/fluent/plugin/out_bigquery.rb +159 -373
- data/test/helper.rb +1 -0
- data/test/plugin/test_out_bigquery.rb +470 -142
- data/test/plugin/test_record_schema.rb +173 -0
- metadata +17 -21
data/test/helper.rb
CHANGED
@@ -34,20 +34,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
34
34
|
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
stub(driver.instance).client { client }
|
42
|
-
client
|
43
|
-
end
|
44
|
-
|
45
|
-
def mock_client(driver)
|
46
|
-
mock(client = Object.new) do |expect|
|
47
|
-
yield expect
|
48
|
-
end
|
49
|
-
stub(driver.instance).client { client }
|
50
|
-
client
|
37
|
+
def stub_writer(driver)
|
38
|
+
writer = driver.instance.writer
|
39
|
+
stub(writer).get_auth { nil }
|
40
|
+
writer
|
51
41
|
end
|
52
42
|
|
53
43
|
def test_configure_table
|
@@ -76,21 +66,25 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
76
66
|
issuer: 'foo@bar.example',
|
77
67
|
signing_key: key) { authorization }
|
78
68
|
|
79
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args
|
80
|
-
mock
|
81
|
-
|
69
|
+
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
70
|
+
mock(cl).__send__(:authorization=, authorization) {}
|
71
|
+
cl
|
72
|
+
end
|
82
73
|
|
83
|
-
driver = create_driver
|
84
|
-
driver.instance.
|
74
|
+
driver = create_driver
|
75
|
+
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
76
|
+
driver.instance.writer
|
77
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
85
78
|
end
|
86
79
|
|
87
80
|
def test_configure_auth_compute_engine
|
88
81
|
authorization = Object.new
|
89
82
|
mock(Google::Auth::GCECredentials).new { authorization }
|
90
83
|
|
91
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args
|
92
|
-
mock
|
93
|
-
|
84
|
+
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
85
|
+
mock(cl).__send__(:authorization=, authorization) {}
|
86
|
+
cl
|
87
|
+
end
|
94
88
|
|
95
89
|
driver = create_driver(%[
|
96
90
|
table foo
|
@@ -99,7 +93,9 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
99
93
|
dataset yourdataset_id
|
100
94
|
field_integer time,status,bytes
|
101
95
|
])
|
102
|
-
driver.instance.
|
96
|
+
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
97
|
+
driver.instance.writer
|
98
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
103
99
|
end
|
104
100
|
|
105
101
|
def test_configure_auth_json_key_as_file
|
@@ -107,9 +103,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
107
103
|
authorization = Object.new
|
108
104
|
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
|
109
105
|
|
110
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args
|
111
|
-
mock
|
112
|
-
|
106
|
+
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
107
|
+
mock(cl).__send__(:authorization=, authorization) {}
|
108
|
+
cl
|
109
|
+
end
|
113
110
|
|
114
111
|
driver = create_driver(%[
|
115
112
|
table foo
|
@@ -119,7 +116,32 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
119
116
|
dataset yourdataset_id
|
120
117
|
field_integer time,status,bytes
|
121
118
|
])
|
122
|
-
driver.instance.
|
119
|
+
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
120
|
+
driver.instance.writer
|
121
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_configure_auth_json_key_as_file_raise_permission_error
|
125
|
+
json_key_path = 'test/plugin/testdata/json_key.json'
|
126
|
+
json_key_path_dir = File.dirname(json_key_path)
|
127
|
+
|
128
|
+
begin
|
129
|
+
File.chmod(0000, json_key_path_dir)
|
130
|
+
|
131
|
+
driver = create_driver(%[
|
132
|
+
table foo
|
133
|
+
auth_method json_key
|
134
|
+
json_key #{json_key_path}
|
135
|
+
project yourproject_id
|
136
|
+
dataset yourdataset_id
|
137
|
+
field_integer time,status,bytes
|
138
|
+
])
|
139
|
+
assert_raises(Errno::EACCES) do
|
140
|
+
driver.instance.writer.client
|
141
|
+
end
|
142
|
+
ensure
|
143
|
+
File.chmod(0755, json_key_path_dir)
|
144
|
+
end
|
123
145
|
end
|
124
146
|
|
125
147
|
def test_configure_auth_json_key_as_string
|
@@ -129,9 +151,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
129
151
|
authorization = Object.new
|
130
152
|
mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
|
131
153
|
|
132
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args
|
133
|
-
mock
|
134
|
-
|
154
|
+
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
155
|
+
mock(cl).__send__(:authorization=, authorization) {}
|
156
|
+
cl
|
157
|
+
end
|
135
158
|
|
136
159
|
driver = create_driver(%[
|
137
160
|
table foo
|
@@ -141,16 +164,19 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
141
164
|
dataset yourdataset_id
|
142
165
|
field_integer time,status,bytes
|
143
166
|
])
|
144
|
-
driver.instance.
|
167
|
+
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
168
|
+
driver.instance.writer
|
169
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
145
170
|
end
|
146
171
|
|
147
172
|
def test_configure_auth_application_default
|
148
173
|
authorization = Object.new
|
149
174
|
mock(Google::Auth).get_application_default([API_SCOPE]) { authorization }
|
150
175
|
|
151
|
-
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args
|
152
|
-
mock
|
153
|
-
|
176
|
+
mock.proxy(Google::Apis::BigqueryV2::BigqueryService).new.with_any_args do |cl|
|
177
|
+
mock(cl).__send__(:authorization=, authorization) {}
|
178
|
+
cl
|
179
|
+
end
|
154
180
|
|
155
181
|
driver = create_driver(%[
|
156
182
|
table foo
|
@@ -159,7 +185,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
159
185
|
dataset yourdataset_id
|
160
186
|
field_integer time,status,bytes
|
161
187
|
])
|
162
|
-
|
188
|
+
|
189
|
+
mock.proxy(Fluent::BigQuery::Writer).new(duck_type(:info, :error, :warn), driver.instance.auth_method, is_a(Hash))
|
190
|
+
driver.instance.writer
|
191
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
163
192
|
end
|
164
193
|
|
165
194
|
def test_configure_fieldname_stripped
|
@@ -275,9 +304,15 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
275
304
|
"requesttime" => (now - 1).to_f.to_s.to_f,
|
276
305
|
"bot_access" => true,
|
277
306
|
"loginsession" => false,
|
307
|
+
"something-else" => "would be ignored",
|
308
|
+
"yet-another" => {
|
309
|
+
"foo" => "bar",
|
310
|
+
"baz" => 1,
|
311
|
+
},
|
278
312
|
"remote" => {
|
279
313
|
"host" => "remote.example",
|
280
314
|
"ip" => "192.0.2.1",
|
315
|
+
"port" => 12345,
|
281
316
|
"user" => "tagomoris",
|
282
317
|
}
|
283
318
|
}
|
@@ -327,7 +362,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
327
362
|
time_field time
|
328
363
|
#{type} time
|
329
364
|
CONFIG
|
330
|
-
stub_client(driver)
|
331
365
|
|
332
366
|
driver.instance.start
|
333
367
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -371,7 +405,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
371
405
|
field_integer metadata.time
|
372
406
|
field_string metadata.node,log
|
373
407
|
CONFIG
|
374
|
-
|
408
|
+
|
375
409
|
driver.instance.start
|
376
410
|
buf = driver.instance.format_stream("my.tag", [input])
|
377
411
|
driver.instance.shutdown
|
@@ -429,12 +463,18 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
429
463
|
"remote" => {
|
430
464
|
"host" => "remote.example",
|
431
465
|
"ip" => "192.0.2.1",
|
466
|
+
"port" => 12345,
|
432
467
|
"user" => "tagomoris",
|
433
468
|
},
|
434
469
|
"response" => {
|
435
470
|
"status" => 1,
|
436
471
|
"bytes" => 3,
|
437
472
|
},
|
473
|
+
"something-else" => "would be ignored",
|
474
|
+
"yet-another" => {
|
475
|
+
"foo" => "bar",
|
476
|
+
"baz" => 1,
|
477
|
+
},
|
438
478
|
}
|
439
479
|
}
|
440
480
|
|
@@ -531,16 +571,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
531
571
|
fetch_schema true
|
532
572
|
field_integer time
|
533
573
|
CONFIG
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
fields_stub = stub!
|
539
|
-
s.schema { schema_stub }
|
540
|
-
schema_stub.fields { fields_stub }
|
541
|
-
fields_stub.as_json { sudo_schema_response.deep_stringify_keys["schema"]["fields"] }
|
542
|
-
s
|
543
|
-
}
|
574
|
+
|
575
|
+
writer = stub_writer(driver)
|
576
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
577
|
+
sudo_schema_response.deep_stringify_keys["schema"]["fields"]
|
544
578
|
end
|
545
579
|
driver.instance.start
|
546
580
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -603,16 +637,10 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
603
637
|
fetch_schema true
|
604
638
|
field_integer time
|
605
639
|
CONFIG
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
fields_stub = stub!
|
611
|
-
s.schema { schema_stub }
|
612
|
-
schema_stub.fields { fields_stub }
|
613
|
-
fields_stub.as_json { sudo_schema_response.deep_stringify_keys["schema"]["fields"] }
|
614
|
-
s
|
615
|
-
}
|
640
|
+
|
641
|
+
writer = stub_writer(driver)
|
642
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', now.strftime('foo_%Y_%m_%d')) do
|
643
|
+
sudo_schema_response.deep_stringify_keys["schema"]["fields"]
|
616
644
|
end
|
617
645
|
driver.instance.start
|
618
646
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -731,6 +759,8 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
731
759
|
dataset yourdataset_id
|
732
760
|
|
733
761
|
field_string uuid
|
762
|
+
|
763
|
+
buffer_type memory
|
734
764
|
CONFIG
|
735
765
|
driver.instance.start
|
736
766
|
buf = driver.instance.format_stream("my.tag", [input])
|
@@ -739,38 +769,6 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
739
769
|
assert_equal expected, buf
|
740
770
|
end
|
741
771
|
|
742
|
-
def test_empty_value_in_required
|
743
|
-
now = Time.now
|
744
|
-
input = [
|
745
|
-
now,
|
746
|
-
{
|
747
|
-
"tty" => "pts/1",
|
748
|
-
"pwd" => "/home/yugui",
|
749
|
-
"user" => nil,
|
750
|
-
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
751
|
-
}
|
752
|
-
]
|
753
|
-
|
754
|
-
driver = create_driver(<<-CONFIG)
|
755
|
-
table foo
|
756
|
-
email foo@bar.example
|
757
|
-
private_key_path /path/to/key
|
758
|
-
project yourproject_id
|
759
|
-
dataset yourdataset_id
|
760
|
-
|
761
|
-
time_format %s
|
762
|
-
time_field time
|
763
|
-
|
764
|
-
schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
|
765
|
-
field_integer time
|
766
|
-
CONFIG
|
767
|
-
driver.instance.start
|
768
|
-
assert_raises(RuntimeError.new("Required field user cannot be null")) do
|
769
|
-
driver.instance.format_stream("my.tag", [input])
|
770
|
-
end
|
771
|
-
driver.instance.shutdown
|
772
|
-
end
|
773
|
-
|
774
772
|
def test_replace_record_key
|
775
773
|
now = Time.now
|
776
774
|
input = [
|
@@ -869,13 +867,119 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
869
867
|
|
870
868
|
def test_write
|
871
869
|
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
872
|
-
driver = create_driver
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
870
|
+
driver = create_driver
|
871
|
+
|
872
|
+
writer = stub_writer(driver)
|
873
|
+
mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', entry, hash_including(
|
874
|
+
skip_invalid_rows: false,
|
875
|
+
ignore_unknown_values: false
|
876
|
+
))
|
877
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
878
|
+
rows: entry,
|
879
|
+
skip_invalid_rows: false,
|
880
|
+
ignore_unknown_values: false
|
881
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
882
|
+
s = stub!
|
883
|
+
s.insert_errors { nil }
|
884
|
+
s
|
885
|
+
end
|
886
|
+
|
887
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
888
|
+
entry.each do |e|
|
889
|
+
chunk << e.to_msgpack
|
890
|
+
end
|
891
|
+
|
892
|
+
driver.instance.start
|
893
|
+
driver.instance.write(chunk)
|
894
|
+
driver.instance.shutdown
|
895
|
+
end
|
896
|
+
|
897
|
+
def test_write_with_retryable_error
|
898
|
+
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
899
|
+
driver = create_driver(<<-CONFIG)
|
900
|
+
table foo
|
901
|
+
email foo@bar.example
|
902
|
+
private_key_path /path/to/key
|
903
|
+
project yourproject_id
|
904
|
+
dataset yourdataset_id
|
905
|
+
|
906
|
+
time_format %s
|
907
|
+
time_field time
|
908
|
+
|
909
|
+
field_integer time,status,bytes
|
910
|
+
field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
|
911
|
+
field_float requesttime
|
912
|
+
field_boolean bot_access,loginsession
|
913
|
+
<secondary>
|
914
|
+
type file
|
915
|
+
path error
|
916
|
+
utc
|
917
|
+
</secondary>
|
918
|
+
CONFIG
|
919
|
+
|
920
|
+
writer = stub_writer(driver)
|
921
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
922
|
+
rows: entry,
|
923
|
+
skip_invalid_rows: false,
|
924
|
+
ignore_unknown_values: false
|
925
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
926
|
+
ex = Google::Apis::ServerError.new("error")
|
927
|
+
def ex.reason
|
928
|
+
"backendError"
|
929
|
+
end
|
930
|
+
raise ex
|
931
|
+
end
|
932
|
+
|
933
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
934
|
+
entry.each do |e|
|
935
|
+
chunk << e.to_msgpack
|
936
|
+
end
|
937
|
+
|
938
|
+
driver.instance.start
|
939
|
+
assert_raise Fluent::BigQuery::Writer::RetryableError do
|
940
|
+
driver.instance.write(chunk)
|
941
|
+
end
|
942
|
+
driver.instance.shutdown
|
943
|
+
end
|
944
|
+
|
945
|
+
def test_write_with_not_retryable_error
|
946
|
+
entry = {json: {a: "b"}}, {json: {b: "c"}}
|
947
|
+
driver = create_driver(<<-CONFIG)
|
948
|
+
table foo
|
949
|
+
email foo@bar.example
|
950
|
+
private_key_path /path/to/key
|
951
|
+
project yourproject_id
|
952
|
+
dataset yourdataset_id
|
953
|
+
|
954
|
+
time_format %s
|
955
|
+
time_field time
|
956
|
+
|
957
|
+
field_integer time,status,bytes
|
958
|
+
field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
|
959
|
+
field_float requesttime
|
960
|
+
field_boolean bot_access,loginsession
|
961
|
+
<secondary>
|
962
|
+
type file
|
963
|
+
path error
|
964
|
+
utc
|
965
|
+
</secondary>
|
966
|
+
CONFIG
|
967
|
+
|
968
|
+
writer = stub_writer(driver)
|
969
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
|
970
|
+
rows: entry,
|
971
|
+
skip_invalid_rows: false,
|
972
|
+
ignore_unknown_values: false
|
973
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
974
|
+
ex = Google::Apis::ServerError.new("error")
|
975
|
+
def ex.reason
|
976
|
+
"invalid"
|
977
|
+
end
|
978
|
+
raise ex
|
877
979
|
end
|
878
980
|
|
981
|
+
mock(driver.instance).flush_secondary(is_a(Fluent::Output))
|
982
|
+
|
879
983
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
880
984
|
entry.each do |e|
|
881
985
|
chunk << e.to_msgpack
|
@@ -902,41 +1006,271 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
902
1006
|
|
903
1007
|
schema_path #{schema_path}
|
904
1008
|
field_integer time
|
1009
|
+
|
1010
|
+
buffer_type memory
|
905
1011
|
CONFIG
|
906
1012
|
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
|
907
1013
|
h[0][:type] = "INTEGER"
|
908
1014
|
h[0][:mode] = "NULLABLE"
|
909
1015
|
end
|
910
1016
|
|
1017
|
+
writer = stub_writer(driver)
|
911
1018
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
912
1019
|
io = StringIO.new("hello")
|
913
1020
|
mock(driver.instance).create_upload_source(chunk).yields(io)
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
1021
|
+
mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
1022
|
+
mock(writer.client).insert_job('yourproject_id', {
|
1023
|
+
configuration: {
|
1024
|
+
load: {
|
1025
|
+
destination_table: {
|
1026
|
+
project_id: 'yourproject_id',
|
1027
|
+
dataset_id: 'yourdataset_id',
|
1028
|
+
table_id: 'foo',
|
1029
|
+
},
|
1030
|
+
schema: {
|
1031
|
+
fields: schema_fields,
|
1032
|
+
},
|
1033
|
+
write_disposition: "WRITE_APPEND",
|
1034
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
1035
|
+
ignore_unknown_values: false,
|
1036
|
+
max_bad_records: 0,
|
1037
|
+
}
|
1038
|
+
}
|
1039
|
+
}, {upload_source: io, content_type: "application/octet-stream", options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
1040
|
+
s = stub!
|
1041
|
+
job_reference_stub = stub!
|
1042
|
+
s.job_reference { job_reference_stub }
|
1043
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
1044
|
+
s
|
1045
|
+
end
|
1046
|
+
|
1047
|
+
entry.each do |e|
|
1048
|
+
chunk << MultiJson.dump(e) + "\n"
|
1049
|
+
end
|
1050
|
+
|
1051
|
+
driver.instance.start
|
1052
|
+
driver.instance.write(chunk)
|
1053
|
+
driver.instance.shutdown
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
def test_write_for_load_with_prevent_duplicate_load
|
1057
|
+
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
1058
|
+
entry = {a: "b"}, {b: "c"}
|
1059
|
+
driver = create_driver(<<-CONFIG)
|
1060
|
+
method load
|
1061
|
+
table foo
|
1062
|
+
email foo@bar.example
|
1063
|
+
private_key_path /path/to/key
|
1064
|
+
project yourproject_id
|
1065
|
+
dataset yourdataset_id
|
1066
|
+
|
1067
|
+
time_format %s
|
1068
|
+
time_field time
|
1069
|
+
|
1070
|
+
schema_path #{schema_path}
|
1071
|
+
field_integer time
|
1072
|
+
prevent_duplicate_load true
|
1073
|
+
|
1074
|
+
buffer_type memory
|
1075
|
+
CONFIG
|
1076
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
|
1077
|
+
h[0][:type] = "INTEGER"
|
1078
|
+
h[0][:mode] = "NULLABLE"
|
1079
|
+
end
|
1080
|
+
|
1081
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1082
|
+
io = StringIO.new("hello")
|
1083
|
+
mock(driver.instance).create_upload_source(chunk).yields(io)
|
1084
|
+
mock.proxy(driver.instance).create_job_id(duck_type(:unique_id), "yourdataset_id", "foo", driver.instance.instance_variable_get(:@fields).to_a, 0, false)
|
1085
|
+
writer = stub_writer(driver)
|
1086
|
+
mock(writer).wait_load_job("yourproject_id", "yourdataset_id", "dummy_job_id", "foo") { nil }
|
1087
|
+
mock(writer.client).insert_job('yourproject_id', {
|
1088
|
+
configuration: {
|
1089
|
+
load: {
|
1090
|
+
destination_table: {
|
1091
|
+
project_id: 'yourproject_id',
|
1092
|
+
dataset_id: 'yourdataset_id',
|
1093
|
+
table_id: 'foo',
|
1094
|
+
},
|
1095
|
+
schema: {
|
1096
|
+
fields: schema_fields,
|
1097
|
+
},
|
1098
|
+
write_disposition: "WRITE_APPEND",
|
1099
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
1100
|
+
ignore_unknown_values: false,
|
1101
|
+
max_bad_records: 0,
|
1102
|
+
},
|
1103
|
+
},
|
1104
|
+
job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
|
1105
|
+
}, {upload_source: io, content_type: "application/octet-stream", options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
1106
|
+
s = stub!
|
1107
|
+
job_reference_stub = stub!
|
1108
|
+
s.job_reference { job_reference_stub }
|
1109
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
1110
|
+
s
|
1111
|
+
end
|
1112
|
+
|
1113
|
+
entry.each do |e|
|
1114
|
+
chunk << MultiJson.dump(e) + "\n"
|
1115
|
+
end
|
1116
|
+
|
1117
|
+
driver.instance.start
|
1118
|
+
driver.instance.write(chunk)
|
1119
|
+
driver.instance.shutdown
|
1120
|
+
end
|
1121
|
+
|
1122
|
+
def test_write_for_load_with_retryable_error
|
1123
|
+
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
1124
|
+
entry = {a: "b"}, {b: "c"}
|
1125
|
+
driver = create_driver(<<-CONFIG)
|
1126
|
+
method load
|
1127
|
+
table foo
|
1128
|
+
email foo@bar.example
|
1129
|
+
private_key_path /path/to/key
|
1130
|
+
project yourproject_id
|
1131
|
+
dataset yourdataset_id
|
1132
|
+
|
1133
|
+
time_format %s
|
1134
|
+
time_field time
|
1135
|
+
|
1136
|
+
schema_path #{schema_path}
|
1137
|
+
field_integer time
|
1138
|
+
|
1139
|
+
buffer_type memory
|
1140
|
+
CONFIG
|
1141
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
|
1142
|
+
h[0][:type] = "INTEGER"
|
1143
|
+
h[0][:mode] = "NULLABLE"
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1147
|
+
io = StringIO.new("hello")
|
1148
|
+
mock(driver.instance).create_upload_source(chunk).yields(io)
|
1149
|
+
writer = stub_writer(driver)
|
1150
|
+
mock(writer.client).insert_job('yourproject_id', {
|
1151
|
+
configuration: {
|
1152
|
+
load: {
|
1153
|
+
destination_table: {
|
1154
|
+
project_id: 'yourproject_id',
|
1155
|
+
dataset_id: 'yourdataset_id',
|
1156
|
+
table_id: 'foo',
|
1157
|
+
},
|
1158
|
+
schema: {
|
1159
|
+
fields: schema_fields,
|
1160
|
+
},
|
1161
|
+
write_disposition: "WRITE_APPEND",
|
1162
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
1163
|
+
ignore_unknown_values: false,
|
1164
|
+
max_bad_records: 0,
|
1165
|
+
}
|
1166
|
+
}
|
1167
|
+
}, {upload_source: io, content_type: "application/octet-stream", options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
1168
|
+
s = stub!
|
1169
|
+
job_reference_stub = stub!
|
1170
|
+
s.job_reference { job_reference_stub }
|
1171
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
1172
|
+
s
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
|
1176
|
+
s = stub!
|
1177
|
+
status_stub = stub!
|
1178
|
+
error_result = stub!
|
1179
|
+
|
1180
|
+
s.status { status_stub }
|
1181
|
+
status_stub.state { "DONE" }
|
1182
|
+
status_stub.error_result { error_result }
|
1183
|
+
status_stub.errors { nil }
|
1184
|
+
error_result.message { "error" }
|
1185
|
+
error_result.reason { "backendError" }
|
1186
|
+
s
|
1187
|
+
end
|
1188
|
+
|
1189
|
+
entry.each do |e|
|
1190
|
+
chunk << MultiJson.dump(e) + "\n"
|
1191
|
+
end
|
1192
|
+
|
1193
|
+
driver.instance.start
|
1194
|
+
assert_raise Fluent::BigQuery::Writer::RetryableError do
|
1195
|
+
driver.instance.write(chunk)
|
1196
|
+
end
|
1197
|
+
driver.instance.shutdown
|
1198
|
+
end
|
1199
|
+
|
1200
|
+
def test_write_for_load_with_not_retryable_error
|
1201
|
+
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
1202
|
+
entry = {a: "b"}, {b: "c"}
|
1203
|
+
driver = create_driver(<<-CONFIG)
|
1204
|
+
method load
|
1205
|
+
table foo
|
1206
|
+
email foo@bar.example
|
1207
|
+
private_key_path /path/to/key
|
1208
|
+
project yourproject_id
|
1209
|
+
dataset yourdataset_id
|
1210
|
+
|
1211
|
+
time_format %s
|
1212
|
+
time_field time
|
1213
|
+
|
1214
|
+
schema_path #{schema_path}
|
1215
|
+
field_integer time
|
1216
|
+
|
1217
|
+
buffer_type memory
|
1218
|
+
<secondary>
|
1219
|
+
type file
|
1220
|
+
path error
|
1221
|
+
utc
|
1222
|
+
</secondary>
|
1223
|
+
CONFIG
|
1224
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
|
1225
|
+
h[0][:type] = "INTEGER"
|
1226
|
+
h[0][:mode] = "NULLABLE"
|
1227
|
+
end
|
1228
|
+
|
1229
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1230
|
+
io = StringIO.new("hello")
|
1231
|
+
mock(driver.instance).create_upload_source(chunk).yields(io)
|
1232
|
+
writer = stub_writer(driver)
|
1233
|
+
mock(writer.client).insert_job('yourproject_id', {
|
1234
|
+
configuration: {
|
1235
|
+
load: {
|
1236
|
+
destination_table: {
|
1237
|
+
project_id: 'yourproject_id',
|
1238
|
+
dataset_id: 'yourdataset_id',
|
1239
|
+
table_id: 'foo',
|
1240
|
+
},
|
1241
|
+
schema: {
|
1242
|
+
fields: schema_fields,
|
1243
|
+
},
|
1244
|
+
write_disposition: "WRITE_APPEND",
|
1245
|
+
source_format: "NEWLINE_DELIMITED_JSON",
|
1246
|
+
ignore_unknown_values: false,
|
1247
|
+
max_bad_records: 0,
|
929
1248
|
}
|
930
|
-
}, {upload_source: io, content_type: "application/octet-stream"}) {
|
931
|
-
s = stub!
|
932
|
-
status_stub = stub!
|
933
|
-
s.status { status_stub }
|
934
|
-
status_stub.state { "DONE" }
|
935
|
-
status_stub.error_result { nil }
|
936
|
-
s
|
937
1249
|
}
|
1250
|
+
}, {upload_source: io, content_type: "application/octet-stream", options: {timeout_sec: nil, open_timeout_sec: 60}}) do
|
1251
|
+
s = stub!
|
1252
|
+
job_reference_stub = stub!
|
1253
|
+
s.job_reference { job_reference_stub }
|
1254
|
+
job_reference_stub.job_id { "dummy_job_id" }
|
1255
|
+
s
|
938
1256
|
end
|
939
1257
|
|
1258
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
|
1259
|
+
s = stub!
|
1260
|
+
status_stub = stub!
|
1261
|
+
error_result = stub!
|
1262
|
+
|
1263
|
+
s.status { status_stub }
|
1264
|
+
status_stub.state { "DONE" }
|
1265
|
+
status_stub.error_result { error_result }
|
1266
|
+
status_stub.errors { nil }
|
1267
|
+
error_result.message { "error" }
|
1268
|
+
error_result.reason { "invalid" }
|
1269
|
+
s
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
mock(driver.instance).flush_secondary(is_a(Fluent::Output))
|
1273
|
+
|
940
1274
|
entry.each do |e|
|
941
1275
|
chunk << MultiJson.dump(e) + "\n"
|
942
1276
|
end
|
@@ -966,15 +1300,19 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
966
1300
|
field_float requesttime
|
967
1301
|
field_boolean bot_access,loginsession
|
968
1302
|
CONFIG
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
1303
|
+
|
1304
|
+
writer = stub_writer(driver)
|
1305
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
|
1306
|
+
rows: [entry[0]],
|
1307
|
+
skip_invalid_rows: false,
|
1308
|
+
ignore_unknown_values: false
|
1309
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) { stub!.insert_errors { nil } }
|
1310
|
+
|
1311
|
+
mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_21', {
|
1312
|
+
rows: [entry[1]],
|
1313
|
+
skip_invalid_rows: false,
|
1314
|
+
ignore_unknown_values: false
|
1315
|
+
}, {options: {timeout_sec: nil, open_timeout_sec: 60}}) { stub!.insert_errors { nil } }
|
978
1316
|
|
979
1317
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
980
1318
|
entry.each do |object|
|
@@ -1085,23 +1423,13 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
1085
1423
|
auto_create_table true
|
1086
1424
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
1087
1425
|
CONFIG
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
table_reference: {
|
1096
|
-
table_id: 'foo',
|
1097
|
-
},
|
1098
|
-
schema: {
|
1099
|
-
fields: JSON.parse(File.read(File.join(File.dirname(__FILE__), "testdata", "apache.schema"))).map(&:deep_symbolize_keys),
|
1100
|
-
}
|
1101
|
-
}, {}) {
|
1102
|
-
stub!
|
1103
|
-
}
|
1104
|
-
end
|
1426
|
+
writer = stub_writer(driver)
|
1427
|
+
mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], hash_including(
|
1428
|
+
skip_invalid_rows: false,
|
1429
|
+
ignore_unknown_values: false,
|
1430
|
+
)) { raise Fluent::BigQuery::Writer::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo")) }
|
1431
|
+
mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@fields))
|
1432
|
+
|
1105
1433
|
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
1106
1434
|
chunk << message.to_msgpack
|
1107
1435
|
|