embulk-output-bigquery 0.4.6 → 0.4.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/README.md +9 -0
- data/embulk-output-bigquery.gemspec +1 -1
- data/lib/embulk/output/bigquery.rb +12 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +29 -12
- data/lib/embulk/output/bigquery/helper.rb +1 -0
- data/test/test_configure.rb +15 -0
- data/test/test_helper.rb +2 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4ff7af5986004058e6bfc02cca1f00e9809f61bc
|
4
|
+
data.tar.gz: 243490b5b6468a9b204b9f7369eea55559fa08b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ace47fce8c23201c9a9cfabfc8f35700c81495eecdf34e255fb463af89f053709a892b9ec0b1603f468d2a09d33b64773bc1ec0909de745efd020a4136504eb
|
7
|
+
data.tar.gz: df1cbc7852e15de10bfd7d9fb88cb4c288926438a64beb7d7f88ebc077c7accf8c02e57778329249192f03b5b5da428eb2380074be4fcafc4d0cdcf640baed5b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.4.7 - 2017-05-02
|
2
|
+
* [enhancement] Support location option to allow to use 'asia-northeast1' region
|
3
|
+
|
4
|
+
## 0.4.6 - 2017-04-17
|
5
|
+
* [enhancement] Support auth_method 'application_default'
|
6
|
+
|
1
7
|
## 0.4.5 - 2017-04-04
|
2
8
|
|
3
9
|
* [maintenance] Fix deprecated warning log condition for `timeout_sec`
|
data/README.md
CHANGED
@@ -44,6 +44,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
44
44
|
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
45
45
|
| project | string | required if json_keyfile is not given | | project_id |
|
46
46
|
| dataset | string | required | | dataset |
|
47
|
+
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
47
48
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
48
49
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
49
50
|
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
|
@@ -123,6 +124,14 @@ out:
|
|
123
124
|
source_format: NEWLINE_DELIMITED_JSON
|
124
125
|
```
|
125
126
|
|
127
|
+
### location
|
128
|
+
|
129
|
+
The geographic location of the dataset. Required except for US and EU.
|
130
|
+
|
131
|
+
`auto_create_table` isn't supported except for US and EU. And GCS bucket should be in same region when you use `gcs_bucket`.
|
132
|
+
|
133
|
+
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
134
|
+
|
126
135
|
### mode
|
127
136
|
|
128
137
|
5 modes are provided.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.7"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -39,6 +39,7 @@ module Embulk
|
|
39
39
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
40
40
|
'project' => config.param('project', :string, :default => nil),
|
41
41
|
'dataset' => config.param('dataset', :string),
|
42
|
+
'location' => config.param('location', :string, :default => nil),
|
42
43
|
'table' => config.param('table', :string),
|
43
44
|
'dataset_old' => config.param('dataset_old', :string, :default => nil),
|
44
45
|
'table_old' => config.param('table_old', :string, :default => nil),
|
@@ -112,6 +113,17 @@ module Embulk
|
|
112
113
|
task['table_old'] ||= task['table']
|
113
114
|
end
|
114
115
|
|
116
|
+
unless task['location'].nil?
|
117
|
+
task['location'] = task['location'].downcase
|
118
|
+
# google-api-client doesn't support create bucket with region
|
119
|
+
# We need to use Cloud Storage Client Libraries to support it
|
120
|
+
if task['auto_create_gcs_bucket']
|
121
|
+
unless %w[us eu].include?(task['location'])
|
122
|
+
raise ConfigError.new "`auto_create_gcs_bucket` isn't supported excepts in us/eu"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
115
127
|
if task['table_old']
|
116
128
|
task['table_old'] = now.strftime(task['table_old'])
|
117
129
|
end
|
@@ -19,6 +19,8 @@ module Embulk
|
|
19
19
|
reset_fields(fields) if fields
|
20
20
|
@project = @task['project']
|
21
21
|
@dataset = @task['dataset']
|
22
|
+
@location = @task['location']
|
23
|
+
@location_for_log = @location.nil? ? 'us/eu' : @location
|
22
24
|
|
23
25
|
@task['source_format'] ||= 'CSV'
|
24
26
|
@task['max_bad_records'] ||= 0
|
@@ -82,7 +84,7 @@ module Embulk
|
|
82
84
|
else
|
83
85
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
84
86
|
end
|
85
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table}" }
|
87
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
86
88
|
|
87
89
|
body = {
|
88
90
|
job_reference: {
|
@@ -110,6 +112,10 @@ module Embulk
|
|
110
112
|
}
|
111
113
|
}
|
112
114
|
}
|
115
|
+
|
116
|
+
if @location
|
117
|
+
body[:job_reference][:location] = @location
|
118
|
+
end
|
113
119
|
|
114
120
|
if @task['schema_update_options']
|
115
121
|
body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
|
@@ -128,7 +134,7 @@ module Embulk
|
|
128
134
|
Embulk.logger.error {
|
129
135
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
130
136
|
}
|
131
|
-
raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table}, response:#{response}"
|
137
|
+
raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
132
138
|
end
|
133
139
|
end
|
134
140
|
end
|
@@ -173,7 +179,7 @@ module Embulk
|
|
173
179
|
else
|
174
180
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
175
181
|
end
|
176
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table}" }
|
182
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
177
183
|
else
|
178
184
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
179
185
|
return
|
@@ -205,6 +211,10 @@ module Embulk
|
|
205
211
|
}
|
206
212
|
}
|
207
213
|
|
214
|
+
if @location
|
215
|
+
body[:job_reference][:location] = @location
|
216
|
+
end
|
217
|
+
|
208
218
|
if @task['schema_update_options']
|
209
219
|
body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
|
210
220
|
end
|
@@ -230,7 +240,7 @@ module Embulk
|
|
230
240
|
Embulk.logger.error {
|
231
241
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
232
242
|
}
|
233
|
-
raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table}, response:#{response}"
|
243
|
+
raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
234
244
|
end
|
235
245
|
end
|
236
246
|
end
|
@@ -269,6 +279,10 @@ module Embulk
|
|
269
279
|
}
|
270
280
|
}
|
271
281
|
|
282
|
+
if @location
|
283
|
+
body[:job_reference][:location] = @location
|
284
|
+
end
|
285
|
+
|
272
286
|
opts = {}
|
273
287
|
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
|
274
288
|
response = with_network_retry { client.insert_job(@project, body, opts) }
|
@@ -312,7 +326,7 @@ module Embulk
|
|
312
326
|
"job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
|
313
327
|
}
|
314
328
|
sleep wait_interval
|
315
|
-
_response = with_network_retry { client.get_job(@project, job_id) }
|
329
|
+
_response = with_network_retry { client.get_job(@project, job_id, location: @location) }
|
316
330
|
end
|
317
331
|
end
|
318
332
|
|
@@ -341,7 +355,7 @@ module Embulk
|
|
341
355
|
def create_dataset(dataset = nil, reference: nil)
|
342
356
|
dataset ||= @dataset
|
343
357
|
begin
|
344
|
-
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset}" }
|
358
|
+
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
|
345
359
|
hint = {}
|
346
360
|
if reference
|
347
361
|
response = get_dataset(reference)
|
@@ -353,8 +367,11 @@ module Embulk
|
|
353
367
|
dataset_id: dataset,
|
354
368
|
},
|
355
369
|
}.merge(hint)
|
370
|
+
if @location
|
371
|
+
body[:location] = @location
|
372
|
+
end
|
356
373
|
opts = {}
|
357
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{body}, #{opts})" }
|
374
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
358
375
|
with_network_retry { client.insert_dataset(@project, body, opts) }
|
359
376
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
360
377
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
@@ -366,14 +383,14 @@ module Embulk
|
|
366
383
|
Embulk.logger.error {
|
367
384
|
"embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
|
368
385
|
}
|
369
|
-
raise Error, "failed to create dataset #{@project}:#{dataset}, response:#{response}"
|
386
|
+
raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
|
370
387
|
end
|
371
388
|
end
|
372
389
|
|
373
390
|
def get_dataset(dataset = nil)
|
374
391
|
dataset ||= @dataset
|
375
392
|
begin
|
376
|
-
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{
|
393
|
+
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
|
377
394
|
with_network_retry { client.get_dataset(@project, dataset) }
|
378
395
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
379
396
|
if e.status_code == 404
|
@@ -416,7 +433,7 @@ module Embulk
|
|
416
433
|
end
|
417
434
|
|
418
435
|
opts = {}
|
419
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts})" }
|
436
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
420
437
|
with_network_retry { client.insert_table(@project, dataset, body, opts) }
|
421
438
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
422
439
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
@@ -426,9 +443,9 @@ module Embulk
|
|
426
443
|
|
427
444
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
428
445
|
Embulk.logger.error {
|
429
|
-
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts}), response:#{response}"
|
446
|
+
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
430
447
|
}
|
431
|
-
raise Error, "failed to create table #{@project}:#{dataset}.#{table}, response:#{response}"
|
448
|
+
raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
432
449
|
end
|
433
450
|
end
|
434
451
|
|
data/test/test_configure.rb
CHANGED
@@ -49,6 +49,7 @@ module Embulk
|
|
49
49
|
assert_equal nil, task['json_keyfile']
|
50
50
|
assert_equal "your_project_name", task['project']
|
51
51
|
assert_equal "your_dataset_name", task['dataset']
|
52
|
+
assert_equal nil, task['location']
|
52
53
|
assert_equal "your_table_name", task['table']
|
53
54
|
assert_equal nil, task['dataset_old']
|
54
55
|
assert_equal nil, task['table_old']
|
@@ -103,6 +104,20 @@ module Embulk
|
|
103
104
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
104
105
|
end
|
105
106
|
|
107
|
+
def test_location
|
108
|
+
config = least_config.merge('location' => 'us')
|
109
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
110
|
+
|
111
|
+
config = least_config.merge('location' => 'eu')
|
112
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
113
|
+
|
114
|
+
config = least_config.merge('location' => 'asia-northeast1')
|
115
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
116
|
+
|
117
|
+
config = least_config.merge('location' => 'asia-northeast1', 'auto_create_gcs_bucket' => true)
|
118
|
+
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
119
|
+
end
|
120
|
+
|
106
121
|
def test_dataset_table_old
|
107
122
|
task = nil
|
108
123
|
config = least_config.merge('mode' => 'replace_backup', 'table_old' => 'backup')
|
data/test/test_helper.rb
CHANGED
@@ -94,6 +94,7 @@ module Embulk
|
|
94
94
|
def test_create_load_job_id
|
95
95
|
task = {
|
96
96
|
'dataset' => 'your_dataset_name',
|
97
|
+
'location' => 'asia-northeast1',
|
97
98
|
'table' => 'your_table_name',
|
98
99
|
'source_format' => 'CSV',
|
99
100
|
'max_bad_records' => nil,
|
@@ -108,6 +109,7 @@ module Embulk
|
|
108
109
|
File.write("tmp/your_file_name", "foobarbaz")
|
109
110
|
job_id = Helper.create_load_job_id(task, 'tmp/your_file_name', fields)
|
110
111
|
assert job_id.is_a?(String)
|
112
|
+
assert_equal 'embulk_load_job_2abaf528b69987db0224e52bbd1f0eec', job_id
|
111
113
|
end
|
112
114
|
end
|
113
115
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-05-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
175
|
version: '0'
|
176
176
|
requirements: []
|
177
177
|
rubyforge_project:
|
178
|
-
rubygems_version: 2.6.
|
178
|
+
rubygems_version: 2.6.13
|
179
179
|
signing_key:
|
180
180
|
specification_version: 4
|
181
181
|
summary: Google BigQuery output plugin for Embulk
|