embulk-output-bigquery 0.4.14 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +28 -0
- data/README.md +74 -77
- data/embulk-output-bigquery.gemspec +10 -3
- data/lib/embulk/output/bigquery.rb +19 -49
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +2 -11
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/value_converter_factory.rb +12 -0
- data/test/test_bigquery_client.rb +1 -5
- data/test/test_configure.rb +10 -19
- data/test/test_example.rb +0 -1
- data/test/test_helper.rb +4 -1
- data/test/test_transaction.rb +22 -62
- data/test/test_value_converter_factory.rb +42 -0
- metadata +29 -52
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_prevent_duplicate_insert.yml +0 -30
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_field_partitioned_table.yml +0 -34
- data/example/config_replace_backup_partitioned_table.yml +0 -34
- data/example/config_replace_field_partitioned_table.yml +0 -33
- data/example/config_replace_partitioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'googleauth'
|
2
|
+
|
3
|
+
module Embulk
|
4
|
+
module Output
|
5
|
+
class Bigquery < OutputPlugin
|
6
|
+
class Auth
|
7
|
+
|
8
|
+
attr_reader :auth_method, :json_key, :scope
|
9
|
+
|
10
|
+
def initialize(task, scope)
|
11
|
+
@auth_method = task['auth_method']
|
12
|
+
@json_key = task['json_keyfile']
|
13
|
+
@scope = scope
|
14
|
+
end
|
15
|
+
|
16
|
+
def authenticate
|
17
|
+
case auth_method
|
18
|
+
when 'authorized_user'
|
19
|
+
key = StringIO.new(json_key)
|
20
|
+
return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
|
21
|
+
when 'compute_engine'
|
22
|
+
return Google::Auth::GCECredentials.new
|
23
|
+
when 'service_account', 'json_key' # json_key is for backward compatibility
|
24
|
+
key = StringIO.new(json_key)
|
25
|
+
return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
|
26
|
+
when 'application_default'
|
27
|
+
return Google::Auth.get_application_default([scope])
|
28
|
+
else
|
29
|
+
raise ConfigError.new("Unknown auth method: #{auth_method}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -79,11 +79,7 @@ module Embulk
|
|
79
79
|
begin
|
80
80
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
81
81
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
82
|
-
|
83
|
-
job_id = Helper.create_load_job_id(@task, path, fields)
|
84
|
-
else
|
85
|
-
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
86
|
-
end
|
82
|
+
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
87
83
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
88
84
|
|
89
85
|
body = {
|
@@ -174,11 +170,7 @@ module Embulk
|
|
174
170
|
if File.exist?(path)
|
175
171
|
# As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
|
176
172
|
# we should generate job_id in client code, otherwise, retrying would cause duplication
|
177
|
-
|
178
|
-
job_id = Helper.create_load_job_id(@task, path, fields)
|
179
|
-
else
|
180
|
-
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
181
|
-
end
|
173
|
+
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
182
174
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
183
175
|
else
|
184
176
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
@@ -437,7 +429,6 @@ module Embulk
|
|
437
429
|
type: options['time_partitioning']['type'],
|
438
430
|
expiration_ms: options['time_partitioning']['expiration_ms'],
|
439
431
|
field: options['time_partitioning']['field'],
|
440
|
-
require_partition_filter: options['time_partitioning']['require_partition_filter'],
|
441
432
|
}
|
442
433
|
end
|
443
434
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require_relative 'auth'
|
2
2
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
@@ -14,6 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, scope, client_class)
|
15
15
|
@task = task
|
16
16
|
@scope = scope
|
17
|
+
@auth = Auth.new(task, scope)
|
17
18
|
@client_class = client_class
|
18
19
|
end
|
19
20
|
|
@@ -37,39 +38,7 @@ module Embulk
|
|
37
38
|
Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
|
38
39
|
Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
|
39
40
|
|
40
|
-
|
41
|
-
when 'private_key'
|
42
|
-
private_key_passphrase = 'notasecret'
|
43
|
-
key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
|
44
|
-
auth = Signet::OAuth2::Client.new(
|
45
|
-
token_credential_uri: "https://accounts.google.com/o/oauth2/token",
|
46
|
-
audience: "https://accounts.google.com/o/oauth2/token",
|
47
|
-
scope: @scope,
|
48
|
-
issuer: @task['service_account_email'],
|
49
|
-
signing_key: key)
|
50
|
-
|
51
|
-
when 'compute_engine'
|
52
|
-
auth = Google::Auth::GCECredentials.new
|
53
|
-
|
54
|
-
when 'json_key'
|
55
|
-
json_key = @task['json_keyfile']
|
56
|
-
if File.exist?(json_key)
|
57
|
-
auth = File.open(json_key) do |f|
|
58
|
-
Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
key = StringIO.new(json_key)
|
62
|
-
auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
|
63
|
-
end
|
64
|
-
|
65
|
-
when 'application_default'
|
66
|
-
auth = Google::Auth.get_application_default([@scope])
|
67
|
-
|
68
|
-
else
|
69
|
-
raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
|
70
|
-
end
|
71
|
-
|
72
|
-
client.authorization = auth
|
41
|
+
client.authorization = @auth.authenticate
|
73
42
|
|
74
43
|
@cached_client_expiration = Time.now + 1800
|
75
44
|
@cached_client = client
|
@@ -203,6 +203,13 @@ module Embulk
|
|
203
203
|
val # Users must care of BQ timestamp format
|
204
204
|
}
|
205
205
|
end
|
206
|
+
when 'DATE'
|
207
|
+
Proc.new {|val|
|
208
|
+
next nil if val.nil?
|
209
|
+
with_typecast_error(val) do |val|
|
210
|
+
TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
|
211
|
+
end
|
212
|
+
}
|
206
213
|
when 'RECORD'
|
207
214
|
Proc.new {|val|
|
208
215
|
next nil if val.nil?
|
@@ -240,6 +247,11 @@ module Embulk
|
|
240
247
|
next nil if val.nil?
|
241
248
|
val.strftime("%Y-%m-%d %H:%M:%S.%6N %:z")
|
242
249
|
}
|
250
|
+
when 'DATE'
|
251
|
+
Proc.new {|val|
|
252
|
+
next nil if val.nil?
|
253
|
+
val.localtime(zone_offset).strftime("%Y-%m-%d")
|
254
|
+
}
|
243
255
|
else
|
244
256
|
raise NotSupportedType, "cannot take column type #{type} for timestamp column"
|
245
257
|
end
|
@@ -32,7 +32,7 @@ else
|
|
32
32
|
'dataset' => 'your_dataset_name',
|
33
33
|
'table' => 'your_table_name',
|
34
34
|
'auth_method' => 'json_key',
|
35
|
-
'json_keyfile' => JSON_KEYFILE,
|
35
|
+
'json_keyfile' => File.read(JSON_KEYFILE),
|
36
36
|
'retries' => 3,
|
37
37
|
'timeout_sec' => 300,
|
38
38
|
'open_timeout_sec' => 300,
|
@@ -61,10 +61,6 @@ else
|
|
61
61
|
def test_json_keyfile
|
62
62
|
assert_nothing_raised { BigqueryClient.new(least_task, schema).client }
|
63
63
|
end
|
64
|
-
|
65
|
-
def test_p12_keyfile
|
66
|
-
# pending
|
67
|
-
end
|
68
64
|
end
|
69
65
|
|
70
66
|
sub_test_case "create_dataset" do
|
data/test/test_configure.rb
CHANGED
@@ -18,10 +18,9 @@ module Embulk
|
|
18
18
|
|
19
19
|
def least_config
|
20
20
|
DataSource.new({
|
21
|
-
'project'
|
22
|
-
'dataset'
|
23
|
-
'table'
|
24
|
-
'p12_keyfile' => __FILE__, # fake
|
21
|
+
'project' => 'your_project_name',
|
22
|
+
'dataset' => 'your_dataset_name',
|
23
|
+
'table' => 'your_table_name',
|
25
24
|
})
|
26
25
|
end
|
27
26
|
|
@@ -43,9 +42,7 @@ module Embulk
|
|
43
42
|
def test_configure_default
|
44
43
|
task = Bigquery.configure(least_config, schema, processor_count)
|
45
44
|
assert_equal "append", task['mode']
|
46
|
-
assert_equal "
|
47
|
-
assert_equal nil, task['service_account_email']
|
48
|
-
assert_equal __FILE__, task['p12_keyfile']
|
45
|
+
assert_equal "application_default", task['auth_method']
|
49
46
|
assert_equal nil, task['json_keyfile']
|
50
47
|
assert_equal "your_project_name", task['project']
|
51
48
|
assert_equal "your_dataset_name", task['dataset']
|
@@ -55,14 +52,13 @@ module Embulk
|
|
55
52
|
assert_equal nil, task['table_old']
|
56
53
|
assert_equal nil, task['table_name_old']
|
57
54
|
assert_equal false, task['auto_create_dataset']
|
58
|
-
assert_equal
|
55
|
+
assert_equal true, task['auto_create_table']
|
59
56
|
assert_equal nil, task['schema_file']
|
60
57
|
assert_equal nil, task['template_table']
|
61
58
|
assert_equal true, task['delete_from_local_when_job_end']
|
62
59
|
assert_equal 3600, task['job_status_max_polling_time']
|
63
60
|
assert_equal 10, task['job_status_polling_interval']
|
64
61
|
assert_equal false, task['is_skip_job_result_check']
|
65
|
-
assert_equal false, task['prevent_duplicate_insert']
|
66
62
|
assert_equal false, task['with_rehearsal']
|
67
63
|
assert_equal 1000, task['rehearsal_counts']
|
68
64
|
assert_equal [], task['column_options']
|
@@ -133,11 +129,6 @@ module Embulk
|
|
133
129
|
config = least_config.merge('auth_method' => 'foobar')
|
134
130
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
135
131
|
|
136
|
-
config = least_config.merge('auth_method' => 'private_key').tap {|h| h.delete('p12_keyfile') }
|
137
|
-
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
138
|
-
config = least_config.merge('auth_method' => 'private_key', 'p12_keyfile' => 'dummy')
|
139
|
-
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
140
|
-
|
141
132
|
config = least_config.merge('auth_method' => 'json_key').tap {|h| h.delete('json_keyfile') }
|
142
133
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
143
134
|
config = least_config.merge('auth_method' => 'json_key', 'json_keyfile' => "#{EXAMPLE_ROOT}/json_key.json")
|
@@ -162,22 +153,22 @@ module Embulk
|
|
162
153
|
end
|
163
154
|
|
164
155
|
def test_payload_column
|
165
|
-
config = least_config.merge('payload_column' => schema.first.name)
|
156
|
+
config = least_config.merge('payload_column' => schema.first.name, 'auto_create_table' => false, 'mode' => 'append_direct')
|
166
157
|
task = Bigquery.configure(config, schema, processor_count)
|
167
158
|
assert_equal task['payload_column_index'], 0
|
168
159
|
|
169
|
-
config = least_config.merge('payload_column' => 'not_exist')
|
160
|
+
config = least_config.merge('payload_column' => 'not_exist', 'auto_create_table' => false, 'mode' => 'append_direct')
|
170
161
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
171
162
|
end
|
172
163
|
|
173
164
|
def test_payload_column_index
|
174
|
-
config = least_config.merge('payload_column_index' => 0)
|
165
|
+
config = least_config.merge('payload_column_index' => 0, 'auto_create_table' => false, 'mode' => 'append_direct')
|
175
166
|
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
176
167
|
|
177
|
-
config = least_config.merge('payload_column_index' => -1)
|
168
|
+
config = least_config.merge('payload_column_index' => -1, 'auto_create_table' => false, 'mode' => 'append_direct')
|
178
169
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
179
170
|
|
180
|
-
config = least_config.merge('payload_column_index' => schema.size)
|
171
|
+
config = least_config.merge('payload_column_index' => schema.size, 'auto_create_table' => false, 'mode' => 'append_direct')
|
181
172
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
182
173
|
end
|
183
174
|
|
data/test/test_example.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -62,7 +62,8 @@ module Embulk
|
|
62
62
|
Column.new({index: 2, name: 'double', type: :double}),
|
63
63
|
Column.new({index: 3, name: 'string', type: :string}),
|
64
64
|
Column.new({index: 4, name: 'timestamp', type: :timestamp}),
|
65
|
-
Column.new({index: 5, name: '
|
65
|
+
Column.new({index: 5, name: 'date', type: :timestamp}),
|
66
|
+
Column.new({index: 6, name: 'json', type: :json}),
|
66
67
|
])
|
67
68
|
task = {
|
68
69
|
'column_options' => [
|
@@ -71,6 +72,7 @@ module Embulk
|
|
71
72
|
{'name' => 'double', 'type' => 'STRING'},
|
72
73
|
{'name' => 'string', 'type' => 'INTEGER'},
|
73
74
|
{'name' => 'timestamp', 'type' => 'INTEGER'},
|
75
|
+
{'name' => 'date', 'type' => 'DATE'},
|
74
76
|
{'name' => 'json', 'type' => 'RECORD', 'fields' => [
|
75
77
|
{ 'name' => 'key1', 'type' => 'STRING' },
|
76
78
|
]},
|
@@ -82,6 +84,7 @@ module Embulk
|
|
82
84
|
{name: 'double', type: 'STRING'},
|
83
85
|
{name: 'string', type: 'INTEGER'},
|
84
86
|
{name: 'timestamp', type: 'INTEGER'},
|
87
|
+
{name: 'date', type: 'DATE'},
|
85
88
|
{name: 'json', type: 'RECORD', fields: [
|
86
89
|
{name: 'key1', type: 'STRING'},
|
87
90
|
]},
|
data/test/test_transaction.rb
CHANGED
@@ -8,12 +8,11 @@ module Embulk
|
|
8
8
|
class TestTransaction < Test::Unit::TestCase
|
9
9
|
def least_config
|
10
10
|
DataSource.new({
|
11
|
-
'project'
|
12
|
-
'dataset'
|
13
|
-
'table'
|
14
|
-
'
|
15
|
-
'
|
16
|
-
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
11
|
+
'project' => 'your_project_name',
|
12
|
+
'dataset' => 'your_dataset_name',
|
13
|
+
'table' => 'your_table_name',
|
14
|
+
'temp_table' => 'temp_table', # randomly created is not good for our test
|
15
|
+
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
17
16
|
})
|
18
17
|
end
|
19
18
|
|
@@ -41,8 +40,8 @@ module Embulk
|
|
41
40
|
end
|
42
41
|
|
43
42
|
sub_test_case "append_direct" do
|
44
|
-
def
|
45
|
-
config = least_config.merge('mode' => 'append_direct')
|
43
|
+
def test_append_direc_without_auto_create
|
44
|
+
config = least_config.merge('mode' => 'append_direct', 'auto_create_dataset' => false, 'auto_create_table' => false)
|
46
45
|
any_instance_of(BigqueryClient) do |obj|
|
47
46
|
mock(obj).get_dataset(config['dataset'])
|
48
47
|
mock(obj).get_table(config['table'])
|
@@ -60,8 +59,8 @@ module Embulk
|
|
60
59
|
Bigquery.transaction(config, schema, processor_count, &control)
|
61
60
|
end
|
62
61
|
|
63
|
-
def
|
64
|
-
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929')
|
62
|
+
def test_append_direct_with_partition_without_auto_create
|
63
|
+
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929', 'auto_create_dataset' => false, 'auto_create_table' => false)
|
65
64
|
any_instance_of(BigqueryClient) do |obj|
|
66
65
|
mock(obj).get_dataset(config['dataset'])
|
67
66
|
mock(obj).get_table(config['table'])
|
@@ -86,7 +85,7 @@ module Embulk
|
|
86
85
|
task = Bigquery.configure(config, schema, processor_count)
|
87
86
|
any_instance_of(BigqueryClient) do |obj|
|
88
87
|
mock(obj).get_dataset(config['dataset'])
|
89
|
-
mock(obj).
|
88
|
+
mock(obj).delete_table_or_partition(config['table'])
|
90
89
|
mock(obj).create_table_if_not_exists(config['table'])
|
91
90
|
end
|
92
91
|
Bigquery.transaction(config, schema, processor_count, &control)
|
@@ -97,7 +96,7 @@ module Embulk
|
|
97
96
|
task = Bigquery.configure(config, schema, processor_count)
|
98
97
|
any_instance_of(BigqueryClient) do |obj|
|
99
98
|
mock(obj).get_dataset(config['dataset'])
|
100
|
-
mock(obj).
|
99
|
+
mock(obj).delete_table_or_partition(config['table'])
|
101
100
|
mock(obj).create_table_if_not_exists(config['table'])
|
102
101
|
end
|
103
102
|
Bigquery.transaction(config, schema, processor_count, &control)
|
@@ -111,6 +110,7 @@ module Embulk
|
|
111
110
|
any_instance_of(BigqueryClient) do |obj|
|
112
111
|
mock(obj).get_dataset(config['dataset'])
|
113
112
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
113
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
114
114
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
115
115
|
mock(obj).delete_table(config['temp_table'])
|
116
116
|
end
|
@@ -120,19 +120,6 @@ module Embulk
|
|
120
120
|
def test_replace_with_partitioning
|
121
121
|
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929')
|
122
122
|
task = Bigquery.configure(config, schema, processor_count)
|
123
|
-
any_instance_of(BigqueryClient) do |obj|
|
124
|
-
mock(obj).get_dataset(config['dataset'])
|
125
|
-
mock(obj).create_table_if_not_exists(config['temp_table'])
|
126
|
-
mock(obj).get_table(config['table'])
|
127
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
128
|
-
mock(obj).delete_table(config['temp_table'])
|
129
|
-
end
|
130
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
131
|
-
end
|
132
|
-
|
133
|
-
def test_replace_with_partitioning_with_auto_create_table
|
134
|
-
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929', 'auto_create_table' => true)
|
135
|
-
task = Bigquery.configure(config, schema, processor_count)
|
136
123
|
any_instance_of(BigqueryClient) do |obj|
|
137
124
|
mock(obj).get_dataset(config['dataset'])
|
138
125
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
@@ -152,8 +139,10 @@ module Embulk
|
|
152
139
|
mock(obj).get_dataset(config['dataset'])
|
153
140
|
mock(obj).get_dataset(config['dataset_old'])
|
154
141
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
142
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
143
|
+
mock(obj).create_table_if_not_exists(config['table_old'], dataset: config['dataset_old'])
|
155
144
|
|
156
|
-
mock(obj).get_table_or_partition(
|
145
|
+
mock(obj).get_table_or_partition(config['table'])
|
157
146
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
158
147
|
|
159
148
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -168,9 +157,11 @@ module Embulk
|
|
168
157
|
any_instance_of(BigqueryClient) do |obj|
|
169
158
|
mock(obj).create_dataset(config['dataset'])
|
170
159
|
mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
|
160
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
171
161
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
162
|
+
mock(obj).create_table_if_not_exists(config['table_old'], dataset: config['dataset_old'])
|
172
163
|
|
173
|
-
mock(obj).get_table_or_partition(
|
164
|
+
mock(obj).get_table_or_partition(config['table'])
|
174
165
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
175
166
|
|
176
167
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -180,35 +171,16 @@ module Embulk
|
|
180
171
|
end
|
181
172
|
|
182
173
|
def test_replace_backup_with_partitioning
|
183
|
-
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20190929', 'temp_table' => 'temp_table')
|
184
|
-
task = Bigquery.configure(config, schema, processor_count)
|
185
|
-
any_instance_of(BigqueryClient) do |obj|
|
186
|
-
mock(obj).get_dataset(config['dataset'])
|
187
|
-
mock(obj).get_dataset(config['dataset_old'])
|
188
|
-
mock(obj).create_table_if_not_exists(config['temp_table'])
|
189
|
-
mock(obj).get_table(task['table'])
|
190
|
-
mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
|
191
|
-
|
192
|
-
mock(obj).get_table_or_partition(task['table'])
|
193
|
-
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
194
|
-
|
195
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
196
|
-
mock(obj).delete_table(config['temp_table'])
|
197
|
-
end
|
198
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
199
|
-
end
|
200
|
-
|
201
|
-
def test_replace_backup_with_partitioning_auto_create_table
|
202
174
|
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20160929', 'temp_table' => 'temp_table', 'auto_create_table' => true)
|
203
175
|
task = Bigquery.configure(config, schema, processor_count)
|
204
176
|
any_instance_of(BigqueryClient) do |obj|
|
205
177
|
mock(obj).get_dataset(config['dataset'])
|
206
178
|
mock(obj).get_dataset(config['dataset_old'])
|
207
179
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
208
|
-
mock(obj).create_table_if_not_exists(
|
209
|
-
mock(obj).create_table_if_not_exists(
|
180
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
181
|
+
mock(obj).create_table_if_not_exists(config['table_old'], dataset: config['dataset_old'])
|
210
182
|
|
211
|
-
mock(obj).get_table_or_partition(
|
183
|
+
mock(obj).get_table_or_partition(config['table'])
|
212
184
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
213
185
|
|
214
186
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -225,6 +197,7 @@ module Embulk
|
|
225
197
|
any_instance_of(BigqueryClient) do |obj|
|
226
198
|
mock(obj).get_dataset(config['dataset'])
|
227
199
|
mock(obj).create_table_if_not_exists(config['temp_table'])
|
200
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
228
201
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
229
202
|
mock(obj).delete_table(config['temp_table'])
|
230
203
|
end
|
@@ -232,19 +205,6 @@ module Embulk
|
|
232
205
|
end
|
233
206
|
|
234
207
|
def test_append_with_partitioning
|
235
|
-
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929')
|
236
|
-
task = Bigquery.configure(config, schema, processor_count)
|
237
|
-
any_instance_of(BigqueryClient) do |obj|
|
238
|
-
mock(obj).get_dataset(config['dataset'])
|
239
|
-
mock(obj).create_table_if_not_exists(config['temp_table'])
|
240
|
-
mock(obj).get_table(config['table'])
|
241
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
242
|
-
mock(obj).delete_table(config['temp_table'])
|
243
|
-
end
|
244
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
245
|
-
end
|
246
|
-
|
247
|
-
def test_append_with_partitioning_with_auto_create_table
|
248
208
|
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929', 'auto_create_table' => true)
|
249
209
|
task = Bigquery.configure(config, schema, processor_count)
|
250
210
|
any_instance_of(BigqueryClient) do |obj|
|