embulk-output-bigquery 0.4.14 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +28 -0
- data/README.md +74 -77
- data/embulk-output-bigquery.gemspec +10 -3
- data/lib/embulk/output/bigquery.rb +19 -49
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +2 -11
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/value_converter_factory.rb +12 -0
- data/test/test_bigquery_client.rb +1 -5
- data/test/test_configure.rb +10 -19
- data/test/test_example.rb +0 -1
- data/test/test_helper.rb +4 -1
- data/test/test_transaction.rb +22 -62
- data/test/test_value_converter_factory.rb +42 -0
- metadata +29 -52
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_prevent_duplicate_insert.yml +0 -30
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_field_partitioned_table.yml +0 -34
- data/example/config_replace_backup_partitioned_table.yml +0 -34
- data/example/config_replace_field_partitioned_table.yml +0 -33
- data/example/config_replace_partitioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'googleauth'
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Output
|
5
|
+
class Bigquery < OutputPlugin
|
6
|
+
class Auth
|
7
|
+
|
8
|
+
attr_reader :auth_method, :json_key, :scope
|
9
|
+
|
10
|
+
def initialize(task, scope)
|
11
|
+
@auth_method = task['auth_method']
|
12
|
+
@json_key = task['json_keyfile']
|
13
|
+
@scope = scope
|
14
|
+
end
|
15
|
+
|
16
|
+
def authenticate
|
17
|
+
case auth_method
|
18
|
+
when 'authorized_user'
|
19
|
+
key = StringIO.new(json_key)
|
20
|
+
return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
|
21
|
+
when 'compute_engine'
|
22
|
+
return Google::Auth::GCECredentials.new
|
23
|
+
when 'service_account', 'json_key' # json_key is for backward compatibility
|
24
|
+
key = StringIO.new(json_key)
|
25
|
+
return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
26
|
+
when 'application_default'
|
27
|
+
return Google::Auth.get_application_default([scope])
|
28
|
+
else
|
29
|
+
raise ConfigError.new("Unknown auth method: #{auth_method}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -79,11 +79,7 @@ module Embulk
|
|
79
79
|
begin
|
80
80
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
81
81
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
82
|
-
|
83
|
-
job_id = Helper.create_load_job_id(@task, path, fields)
|
84
|
-
else
|
85
|
-
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
86
|
-
end
|
82
|
+
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
87
83
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
88
84
|
|
89
85
|
body = {
|
@@ -174,11 +170,7 @@ module Embulk
|
|
174
170
|
if File.exist?(path)
|
175
171
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
176
172
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
177
|
-
|
178
|
-
job_id = Helper.create_load_job_id(@task, path, fields)
|
179
|
-
else
|
180
|
-
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
181
|
-
end
|
173
|
+
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
182
174
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
183
175
|
else
|
184
176
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
@@ -437,7 +429,6 @@ module Embulk
|
|
437
429
|
type: options['time_partitioning']['type'],
|
438
430
|
expiration_ms: options['time_partitioning']['expiration_ms'],
|
439
431
|
field: options['time_partitioning']['field'],
|
440
|
-
require_partition_filter: options['time_partitioning']['require_partition_filter'],
|
441
432
|
}
|
442
433
|
end
|
443
434
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'auth'
|
2
2
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
@@ -14,6 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, scope, client_class)
|
15
15
|
@task = task
|
16
16
|
@scope = scope
|
17
|
+
@auth = Auth.new(task, scope)
|
17
18
|
@client_class = client_class
|
18
19
|
end
|
19
20
|
|
@@ -37,39 +38,7 @@ module Embulk
|
|
37
38
|
Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
|
38
39
|
Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
|
39
40
|
|
40
|
-
|
41
|
-
when 'private_key'
|
42
|
-
private_key_passphrase = 'notasecret'
|
43
|
-
key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
|
44
|
-
auth = Signet::OAuth2::Client.new(
|
45
|
-
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
46
|
-
audience: "https://accounts.google.com/o/oauth2/token",
|
47
|
-
scope: @scope,
|
48
|
-
issuer: @task['service_account_email'],
|
49
|
-
signing_key: key)
|
50
|
-
|
51
|
-
when 'compute_engine'
|
52
|
-
auth = Google::Auth::GCECredentials.new
|
53
|
-
|
54
|
-
when 'json_key'
|
55
|
-
json_key = @task['json_keyfile']
|
56
|
-
if File.exist?(json_key)
|
57
|
-
auth = File.open(json_key) do |f|
|
58
|
-
Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
key = StringIO.new(json_key)
|
62
|
-
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
|
63
|
-
end
|
64
|
-
|
65
|
-
when 'application_default'
|
66
|
-
auth = Google::Auth.get_application_default([@scope])
|
67
|
-
|
68
|
-
else
|
69
|
-
raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
|
70
|
-
end
|
71
|
-
|
72
|
-
client.authorization = auth
|
41
|
+
client.authorization = @auth.authenticate
|
73
42
|
|
74
43
|
@cached_client_expiration = Time.now + 1800
|
75
44
|
@cached_client = client
|
@@ -203,6 +203,13 @@ module Embulk
|
|
203
203
|
val # Users must care of BQ timestamp format
|
204
204
|
}
|
205
205
|
end
|
206
|
+
when 'DATE'
|
207
|
+
Proc.new {|val|
|
208
|
+
next nil if val.nil?
|
209
|
+
with_typecast_error(val) do |val|
|
210
|
+
TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
|
211
|
+
end
|
212
|
+
}
|
206
213
|
when 'RECORD'
|
207
214
|
Proc.new {|val|
|
208
215
|
next nil if val.nil?
|
@@ -240,6 +247,11 @@ module Embulk
|
|
240
247
|
next nil if val.nil?
|
241
248
|
val.strftime("%Y-%m-%d %H:%M:%S.%6N %:z")
|
242
249
|
}
|
250
|
+
when 'DATE'
|
251
|
+
Proc.new {|val|
|
252
|
+
next nil if val.nil?
|
253
|
+
val.localtime(zone_offset).strftime("%Y-%m-%d")
|
254
|
+
}
|
243
255
|
else
|
244
256
|
raise NotSupportedType, "cannot take column type #{type} for timestamp column"
|
245
257
|
end
|
@@ -32,7 +32,7 @@ else
|
|
32
32
|
'dataset' => 'your_dataset_name',
|
33
33
|
'table' => 'your_table_name',
|
34
34
|
'auth_method' => 'json_key',
|
35
|
-
'json_keyfile' => JSON_KEYFILE,
|
35
|
+
'json_keyfile' => File.read(JSON_KEYFILE),
|
36
36
|
'retries' => 3,
|
37
37
|
'timeout_sec' => 300,
|
38
38
|
'open_timeout_sec' => 300,
|
@@ -61,10 +61,6 @@ else
|
|
61
61
|
def test_json_keyfile
|
62
62
|
assert_nothing_raised { BigqueryClient.new(least_task, schema).client }
|
63
63
|
end
|
64
|
-
|
65
|
-
def test_p12_keyfile
|
66
|
-
# pending
|
67
|
-
end
|
68
64
|
end
|
69
65
|
|
70
66
|
sub_test_case "create_dataset" do
|
data/test/test_configure.rb
CHANGED
@@ -18,10 +18,9 @@ module Embulk
|
|
18
18
|
|
19
19
|
def least_config
|
20
20
|
DataSource.new({
|
21
|
-
'project'
|
22
|
-
'dataset'
|
23
|
-
'table'
|
24
|
-
'p12_keyfile' => __FILE__, # fake
|
21
|
+
'project' => 'your_project_name',
|
22
|
+
'dataset' => 'your_dataset_name',
|
23
|
+
'table' => 'your_table_name',
|
25
24
|
})
|
26
25
|
end
|
27
26
|
|
@@ -43,9 +42,7 @@ module Embulk
|
|
43
42
|
def test_configure_default
|
44
43
|
task = Bigquery.configure(least_config, schema, processor_count)
|
45
44
|
assert_equal "append", task['mode']
|
46
|
-
assert_equal "
|
47
|
-
assert_equal nil, task['service_account_email']
|
48
|
-
assert_equal __FILE__, task['p12_keyfile']
|
45
|
+
assert_equal "application_default", task['auth_method']
|
49
46
|
assert_equal nil, task['json_keyfile']
|
50
47
|
assert_equal "your_project_name", task['project']
|
51
48
|
assert_equal "your_dataset_name", task['dataset']
|
@@ -55,14 +52,13 @@ module Embulk
|
|
55
52
|
assert_equal nil, task['table_old']
|
56
53
|
assert_equal nil, task['table_name_old']
|
57
54
|
assert_equal false, task['auto_create_dataset']
|
58
|
-
assert_equal
|
55
|
+
assert_equal true, task['auto_create_table']
|
59
56
|
assert_equal nil, task['schema_file']
|
60
57
|
assert_equal nil, task['template_table']
|
61
58
|
assert_equal true, task['delete_from_local_when_job_end']
|
62
59
|
assert_equal 3600, task['job_status_max_polling_time']
|
63
60
|
assert_equal 10, task['job_status_polling_interval']
|
64
61
|
assert_equal false, task['is_skip_job_result_check']
|
65
|
-
assert_equal false, task['prevent_duplicate_insert']
|
66
62
|
assert_equal false, task['with_rehearsal']
|
67
63
|
assert_equal 1000, task['rehearsal_counts']
|
68
64
|
assert_equal [], task['column_options']
|
@@ -133,11 +129,6 @@ module Embulk
|
|
133
129
|
config = least_config.merge('auth_method' => 'foobar')
|
134
130
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
135
131
|
|
136
|
-
config = least_config.merge('auth_method' => 'private_key').tap {|h| h.delete('p12_keyfile') }
|
137
|
-
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
138
|
-
config = least_config.merge('auth_method' => 'private_key', 'p12_keyfile' => 'dummy')
|
139
|
-
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
140
|
-
|
141
132
|
config = least_config.merge('auth_method' => 'json_key').tap {|h| h.delete('json_keyfile') }
|
142
133
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
143
134
|
config = least_config.merge('auth_method' => 'json_key', 'json_keyfile' => "#{EXAMPLE_ROOT}/json_key.json")
|
@@ -162,22 +153,22 @@ module Embulk
|
|
162
153
|
end
|
163
154
|
|
164
155
|
def test_payload_column
|
165
|
-
config = least_config.merge('payload_column' => schema.first.name)
|
156
|
+
config = least_config.merge('payload_column' => schema.first.name, 'auto_create_table' => false, 'mode' => 'append_direct')
|
166
157
|
task = Bigquery.configure(config, schema, processor_count)
|
167
158
|
assert_equal task['payload_column_index'], 0
|
168
159
|
|
169
|
-
config = least_config.merge('payload_column' => 'not_exist')
|
160
|
+
config = least_config.merge('payload_column' => 'not_exist', 'auto_create_table' => false, 'mode' => 'append_direct')
|
170
161
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
171
162
|
end
|
172
163
|
|
173
164
|
def test_payload_column_index
|
174
|
-
config = least_config.merge('payload_column_index' => 0)
|
165
|
+
config = least_config.merge('payload_column_index' => 0, 'auto_create_table' => false, 'mode' => 'append_direct')
|
175
166
|
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
176
167
|
|
177
|
-
config = least_config.merge('payload_column_index' => -1)
|
168
|
+
config = least_config.merge('payload_column_index' => -1, 'auto_create_table' => false, 'mode' => 'append_direct')
|
178
169
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
179
170
|
|
180
|
-
config = least_config.merge('payload_column_index' => schema.size)
|
171
|
+
config = least_config.merge('payload_column_index' => schema.size, 'auto_create_table' => false, 'mode' => 'append_direct')
|
181
172
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
182
173
|
end
|
183
174
|
|
data/test/test_example.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -62,7 +62,8 @@ module Embulk
|
|
62
62
|
Column.new({index: 2, name: 'double', type: :double}),
|
63
63
|
Column.new({index: 3, name: 'string', type: :string}),
|
64
64
|
Column.new({index: 4, name: 'timestamp', type: :timestamp}),
|
65
|
-
Column.new({index: 5, name: '
|
65
|
+
Column.new({index: 5, name: 'date', type: :timestamp}),
|
66
|
+
Column.new({index: 6, name: 'json', type: :json}),
|
66
67
|
])
|
67
68
|
task = {
|
68
69
|
'column_options' => [
|
@@ -71,6 +72,7 @@ module Embulk
|
|
71
72
|
{'name' => 'double', 'type' => 'STRING'},
|
72
73
|
{'name' => 'string', 'type' => 'INTEGER'},
|
73
74
|
{'name' => 'timestamp', 'type' => 'INTEGER'},
|
75
|
+
{'name' => 'date', 'type' => 'DATE'},
|
74
76
|
{'name' => 'json', 'type' => 'RECORD', 'fields' => [
|
75
77
|
{ 'name' => 'key1', 'type' => 'STRING' },
|
76
78
|
]},
|
@@ -82,6 +84,7 @@ module Embulk
|
|
82
84
|
{name: 'double', type: 'STRING'},
|
83
85
|
{name: 'string', type: 'INTEGER'},
|
84
86
|
{name: 'timestamp', type: 'INTEGER'},
|
87
|
+
{name: 'date', type: 'DATE'},
|
85
88
|
{name: 'json', type: 'RECORD', fields: [
|
86
89
|
{name: 'key1', type: 'STRING'},
|
87
90
|
]},
|
data/test/test_transaction.rb
CHANGED
@@ -8,12 +8,11 @@ module Embulk
|
|
8
8
|
class TestTransaction < Test::Unit::TestCase
|
9
9
|
def least_config
|
10
10
|
DataSource.new({
|
11
|
-
'project'
|
12
|
-
'dataset'
|
13
|
-
'table'
|
14
|
-
'
|
15
|
-
'
|
16
|
-
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
11
|
+
'project' => 'your_project_name',
|
12
|
+
'dataset' => 'your_dataset_name',
|
13
|
+
'table' => 'your_table_name',
|
14
|
+
'temp_table' => 'temp_table', # randomly created is not good for our test
|
15
|
+
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
17
16
|
})
|
18
17
|
end
|
19
18
|
|
@@ -41,8 +40,8 @@ module Embulk
|
|
41
40
|
end
|
42
41
|
|
43
42
|
sub_test_case "append_direct" do
|
44
|
-
def
|
45
|
-
config = least_config.merge('mode' => 'append_direct')
|
43
|
+
def test_append_direc_without_auto_create
|
44
|
+
config = least_config.merge('mode' => 'append_direct', 'auto_create_dataset' => false, 'auto_create_table' => false)
|
46
45
|
any_instance_of(BigqueryClient) do |obj|
|
47
46
|
mock(obj).get_dataset(config['dataset'])
|
48
47
|
mock(obj).get_table(config['table'])
|
@@ -60,8 +59,8 @@ module Embulk
|
|
60
59
|
Bigquery.transaction(config, schema, processor_count, &control)
|
61
60
|
end
|
62
61
|
|
63
|
-
def
|
64
|
-
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929')
|
62
|
+
def test_append_direct_with_partition_without_auto_create
|
63
|
+
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929', 'auto_create_dataset' => false, 'auto_create_table' => false)
|
65
64
|
any_instance_of(BigqueryClient) do |obj|
|
66
65
|
mock(obj).get_dataset(config['dataset'])
|
67
66
|
mock(obj).get_table(config['table'])
|
@@ -86,7 +85,7 @@ module Embulk
|
|
86
85
|
task = Bigquery.configure(config, schema, processor_count)
|
87
86
|
any_instance_of(BigqueryClient) do |obj|
|
88
87
|
mock(obj).get_dataset(config['dataset'])
|
89
|
-
mock(obj).
|
88
|
+
mock(obj).delete_table_or_partition(config['table'])
|
90
89
|
mock(obj).create_table_if_not_exists(config['table'])
|
91
90
|
end
|
92
91
|
Bigquery.transaction(config, schema, processor_count, &control)
|
@@ -97,7 +96,7 @@ module Embulk
|
|
97
96
|
task = Bigquery.configure(config, schema, processor_count)
|
98
97
|
any_instance_of(BigqueryClient) do |obj|
|
99
98
|
mock(obj).get_dataset(config['dataset'])
|
100
|
-
mock(obj).
|
99
|
+
mock(obj).delete_table_or_partition(config['table'])
|
101
100
|
mock(obj).create_table_if_not_exists(config['table'])
|
102
101
|
end
|
103
102
|
Bigquery.transaction(config, schema, processor_count, &control)
|
@@ -111,6 +110,7 @@ module Embulk
|
|
111
110
|
any_instance_of(BigqueryClient) do |obj|
|
112
111
|
mock(obj).get_dataset(config['dataset'])
|
113
112
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
113
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
114
114
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
115
115
|
mock(obj).delete_table(config['temp_table'])
|
116
116
|
end
|
@@ -120,19 +120,6 @@ module Embulk
|
|
120
120
|
def test_replace_with_partitioning
|
121
121
|
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929')
|
122
122
|
task = Bigquery.configure(config, schema, processor_count)
|
123
|
-
any_instance_of(BigqueryClient) do |obj|
|
124
|
-
mock(obj).get_dataset(config['dataset'])
|
125
|
-
mock(obj).create_table_if_not_exists(config['temp_table'])
|
126
|
-
mock(obj).get_table(config['table'])
|
127
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
128
|
-
mock(obj).delete_table(config['temp_table'])
|
129
|
-
end
|
130
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
131
|
-
end
|
132
|
-
|
133
|
-
def test_replace_with_partitioning_with_auto_create_table
|
134
|
-
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929', 'auto_create_table' => true)
|
135
|
-
task = Bigquery.configure(config, schema, processor_count)
|
136
123
|
any_instance_of(BigqueryClient) do |obj|
|
137
124
|
mock(obj).get_dataset(config['dataset'])
|
138
125
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
@@ -152,8 +139,10 @@ module Embulk
|
|
152
139
|
mock(obj).get_dataset(config['dataset'])
|
153
140
|
mock(obj).get_dataset(config['dataset_old'])
|
154
141
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
142
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
143
|
+
mock(obj).create_table_if_not_exists(config['table_old'], dataset: config['dataset_old'])
|
155
144
|
|
156
|
-
mock(obj).get_table_or_partition(
|
145
|
+
mock(obj).get_table_or_partition(config['table'])
|
157
146
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
158
147
|
|
159
148
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -168,9 +157,11 @@ module Embulk
|
|
168
157
|
any_instance_of(BigqueryClient) do |obj|
|
169
158
|
mock(obj).create_dataset(config['dataset'])
|
170
159
|
mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
|
160
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
171
161
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
162
|
+
mock(obj).create_table_if_not_exists(config['table_old'], dataset: config['dataset_old'])
|
172
163
|
|
173
|
-
mock(obj).get_table_or_partition(
|
164
|
+
mock(obj).get_table_or_partition(config['table'])
|
174
165
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
175
166
|
|
176
167
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -180,35 +171,16 @@ module Embulk
|
|
180
171
|
end
|
181
172
|
|
182
173
|
def test_replace_backup_with_partitioning
|
183
|
-
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20190929', 'temp_table' => 'temp_table')
|
184
|
-
task = Bigquery.configure(config, schema, processor_count)
|
185
|
-
any_instance_of(BigqueryClient) do |obj|
|
186
|
-
mock(obj).get_dataset(config['dataset'])
|
187
|
-
mock(obj).get_dataset(config['dataset_old'])
|
188
|
-
mock(obj).create_table_if_not_exists(config['temp_table'])
|
189
|
-
mock(obj).get_table(task['table'])
|
190
|
-
mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
|
191
|
-
|
192
|
-
mock(obj).get_table_or_partition(task['table'])
|
193
|
-
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
194
|
-
|
195
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
196
|
-
mock(obj).delete_table(config['temp_table'])
|
197
|
-
end
|
198
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
199
|
-
end
|
200
|
-
|
201
|
-
def test_replace_backup_with_partitioning_auto_create_table
|
202
174
|
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20160929', 'temp_table' => 'temp_table', 'auto_create_table' => true)
|
203
175
|
task = Bigquery.configure(config, schema, processor_count)
|
204
176
|
any_instance_of(BigqueryClient) do |obj|
|
205
177
|
mock(obj).get_dataset(config['dataset'])
|
206
178
|
mock(obj).get_dataset(config['dataset_old'])
|
207
179
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
208
|
-
mock(obj).create_table_if_not_exists(
|
209
|
-
mock(obj).create_table_if_not_exists(
|
180
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
181
|
+
mock(obj).create_table_if_not_exists(config['table_old'], dataset: config['dataset_old'])
|
210
182
|
|
211
|
-
mock(obj).get_table_or_partition(
|
183
|
+
mock(obj).get_table_or_partition(config['table'])
|
212
184
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
213
185
|
|
214
186
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -225,6 +197,7 @@ module Embulk
|
|
225
197
|
any_instance_of(BigqueryClient) do |obj|
|
226
198
|
mock(obj).get_dataset(config['dataset'])
|
227
199
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
200
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
228
201
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
229
202
|
mock(obj).delete_table(config['temp_table'])
|
230
203
|
end
|
@@ -232,19 +205,6 @@ module Embulk
|
|
232
205
|
end
|
233
206
|
|
234
207
|
def test_append_with_partitioning
|
235
|
-
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929')
|
236
|
-
task = Bigquery.configure(config, schema, processor_count)
|
237
|
-
any_instance_of(BigqueryClient) do |obj|
|
238
|
-
mock(obj).get_dataset(config['dataset'])
|
239
|
-
mock(obj).create_table_if_not_exists(config['temp_table'])
|
240
|
-
mock(obj).get_table(config['table'])
|
241
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
242
|
-
mock(obj).delete_table(config['temp_table'])
|
243
|
-
end
|
244
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
245
|
-
end
|
246
|
-
|
247
|
-
def test_append_with_partitioning_with_auto_create_table
|
248
208
|
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929', 'auto_create_table' => true)
|
249
209
|
task = Bigquery.configure(config, schema, processor_count)
|
250
210
|
any_instance_of(BigqueryClient) do |obj|
|