embulk-output-bigquery 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/README.md +9 -0
- data/embulk-output-bigquery.gemspec +1 -1
- data/lib/embulk/output/bigquery.rb +12 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +29 -12
- data/lib/embulk/output/bigquery/helper.rb +1 -0
- data/test/test_configure.rb +15 -0
- data/test/test_helper.rb +2 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4ff7af5986004058e6bfc02cca1f00e9809f61bc
|
4
|
+
data.tar.gz: 243490b5b6468a9b204b9f7369eea55559fa08b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ace47fce8c23201c9a9cfabfc8f35700c81495eecdf34e255fb463af89f053709a892b9ec0b1603f468d2a09d33b64773bc1ec0909de745efd020a4136504eb
|
7
|
+
data.tar.gz: df1cbc7852e15de10bfd7d9fb88cb4c288926438a64beb7d7f88ebc077c7accf8c02e57778329249192f03b5b5da428eb2380074be4fcafc4d0cdcf640baed5b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.4.7 - 2017-05-02
|
2
|
+
* [enhancement] Support location option to allow to use 'asia-northeast1' region
|
3
|
+
|
4
|
+
## 0.4.6 - 2017-04-17
|
5
|
+
* [enhancement] Support auth_method 'application_default'
|
6
|
+
|
1
7
|
## 0.4.5 - 2017-04-04
|
2
8
|
|
3
9
|
* [maintenance] Fix deprecated warning log condition for `timeout_sec`
|
data/README.md
CHANGED
@@ -44,6 +44,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
44
44
|
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
45
45
|
| project | string | required if json_keyfile is not given | | project_id |
|
46
46
|
| dataset | string | required | | dataset |
|
47
|
+
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
47
48
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
48
49
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
49
50
|
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
|
@@ -123,6 +124,14 @@ out:
|
|
123
124
|
source_format: NEWLINE_DELIMITED_JSON
|
124
125
|
```
|
125
126
|
|
127
|
+
### location
|
128
|
+
|
129
|
+
The geographic location of the dataset. Required except for US and EU.
|
130
|
+
|
131
|
+
`auto_create_table` isn't supported except for US and EU. And GCS bucket should be in same region when you use `gcs_bucket`.
|
132
|
+
|
133
|
+
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
134
|
+
|
126
135
|
### mode
|
127
136
|
|
128
137
|
5 modes are provided.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.7"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -39,6 +39,7 @@ module Embulk
|
|
39
39
|
'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
|
40
40
|
'project' => config.param('project', :string, :default => nil),
|
41
41
|
'dataset' => config.param('dataset', :string),
|
42
|
+
'location' => config.param('location', :string, :default => nil),
|
42
43
|
'table' => config.param('table', :string),
|
43
44
|
'dataset_old' => config.param('dataset_old', :string, :default => nil),
|
44
45
|
'table_old' => config.param('table_old', :string, :default => nil),
|
@@ -112,6 +113,17 @@ module Embulk
|
|
112
113
|
task['table_old'] ||= task['table']
|
113
114
|
end
|
114
115
|
|
116
|
+
unless task['location'].nil?
|
117
|
+
task['location'] = task['location'].downcase
|
118
|
+
# google-api-client doesn't support create bucket with region
|
119
|
+
# We need to use Cloud Storage Client Libraries to support it
|
120
|
+
if task['auto_create_gcs_bucket']
|
121
|
+
unless %w[us eu].include?(task['location'])
|
122
|
+
raise ConfigError.new "`auto_create_gcs_bucket` isn't supported excepts in us/eu"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
115
127
|
if task['table_old']
|
116
128
|
task['table_old'] = now.strftime(task['table_old'])
|
117
129
|
end
|
@@ -19,6 +19,8 @@ module Embulk
|
|
19
19
|
reset_fields(fields) if fields
|
20
20
|
@project = @task['project']
|
21
21
|
@dataset = @task['dataset']
|
22
|
+
@location = @task['location']
|
23
|
+
@location_for_log = @location.nil? ? 'us/eu' : @location
|
22
24
|
|
23
25
|
@task['source_format'] ||= 'CSV'
|
24
26
|
@task['max_bad_records'] ||= 0
|
@@ -82,7 +84,7 @@ module Embulk
|
|
82
84
|
else
|
83
85
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
84
86
|
end
|
85
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table}" }
|
87
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
86
88
|
|
87
89
|
body = {
|
88
90
|
job_reference: {
|
@@ -110,6 +112,10 @@ module Embulk
|
|
110
112
|
}
|
111
113
|
}
|
112
114
|
}
|
115
|
+
|
116
|
+
if @location
|
117
|
+
body[:job_reference][:location] = @location
|
118
|
+
end
|
113
119
|
|
114
120
|
if @task['schema_update_options']
|
115
121
|
body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
|
@@ -128,7 +134,7 @@ module Embulk
|
|
128
134
|
Embulk.logger.error {
|
129
135
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
130
136
|
}
|
131
|
-
raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table}, response:#{response}"
|
137
|
+
raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
132
138
|
end
|
133
139
|
end
|
134
140
|
end
|
@@ -173,7 +179,7 @@ module Embulk
|
|
173
179
|
else
|
174
180
|
job_id = "embulk_load_job_#{SecureRandom.uuid}"
|
175
181
|
end
|
176
|
-
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table}" }
|
182
|
+
Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
|
177
183
|
else
|
178
184
|
Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
|
179
185
|
return
|
@@ -205,6 +211,10 @@ module Embulk
|
|
205
211
|
}
|
206
212
|
}
|
207
213
|
|
214
|
+
if @location
|
215
|
+
body[:job_reference][:location] = @location
|
216
|
+
end
|
217
|
+
|
208
218
|
if @task['schema_update_options']
|
209
219
|
body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
|
210
220
|
end
|
@@ -230,7 +240,7 @@ module Embulk
|
|
230
240
|
Embulk.logger.error {
|
231
241
|
"embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
|
232
242
|
}
|
233
|
-
raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table}, response:#{response}"
|
243
|
+
raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
234
244
|
end
|
235
245
|
end
|
236
246
|
end
|
@@ -269,6 +279,10 @@ module Embulk
|
|
269
279
|
}
|
270
280
|
}
|
271
281
|
|
282
|
+
if @location
|
283
|
+
body[:job_reference][:location] = @location
|
284
|
+
end
|
285
|
+
|
272
286
|
opts = {}
|
273
287
|
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
|
274
288
|
response = with_network_retry { client.insert_job(@project, body, opts) }
|
@@ -312,7 +326,7 @@ module Embulk
|
|
312
326
|
"job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
|
313
327
|
}
|
314
328
|
sleep wait_interval
|
315
|
-
_response = with_network_retry { client.get_job(@project, job_id) }
|
329
|
+
_response = with_network_retry { client.get_job(@project, job_id, location: @location) }
|
316
330
|
end
|
317
331
|
end
|
318
332
|
|
@@ -341,7 +355,7 @@ module Embulk
|
|
341
355
|
def create_dataset(dataset = nil, reference: nil)
|
342
356
|
dataset ||= @dataset
|
343
357
|
begin
|
344
|
-
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset}" }
|
358
|
+
Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
|
345
359
|
hint = {}
|
346
360
|
if reference
|
347
361
|
response = get_dataset(reference)
|
@@ -353,8 +367,11 @@ module Embulk
|
|
353
367
|
dataset_id: dataset,
|
354
368
|
},
|
355
369
|
}.merge(hint)
|
370
|
+
if @location
|
371
|
+
body[:location] = @location
|
372
|
+
end
|
356
373
|
opts = {}
|
357
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{body}, #{opts})" }
|
374
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
358
375
|
with_network_retry { client.insert_dataset(@project, body, opts) }
|
359
376
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
360
377
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
@@ -366,14 +383,14 @@ module Embulk
|
|
366
383
|
Embulk.logger.error {
|
367
384
|
"embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
|
368
385
|
}
|
369
|
-
raise Error, "failed to create dataset #{@project}:#{dataset}, response:#{response}"
|
386
|
+
raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
|
370
387
|
end
|
371
388
|
end
|
372
389
|
|
373
390
|
def get_dataset(dataset = nil)
|
374
391
|
dataset ||= @dataset
|
375
392
|
begin
|
376
|
-
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{
|
393
|
+
Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
|
377
394
|
with_network_retry { client.get_dataset(@project, dataset) }
|
378
395
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
379
396
|
if e.status_code == 404
|
@@ -416,7 +433,7 @@ module Embulk
|
|
416
433
|
end
|
417
434
|
|
418
435
|
opts = {}
|
419
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts})" }
|
436
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
|
420
437
|
with_network_retry { client.insert_table(@project, dataset, body, opts) }
|
421
438
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
422
439
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
@@ -426,9 +443,9 @@ module Embulk
|
|
426
443
|
|
427
444
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
428
445
|
Embulk.logger.error {
|
429
|
-
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts}), response:#{response}"
|
446
|
+
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
|
430
447
|
}
|
431
|
-
raise Error, "failed to create table #{@project}:#{dataset}.#{table}, response:#{response}"
|
448
|
+
raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
|
432
449
|
end
|
433
450
|
end
|
434
451
|
|
data/test/test_configure.rb
CHANGED
@@ -49,6 +49,7 @@ module Embulk
|
|
49
49
|
assert_equal nil, task['json_keyfile']
|
50
50
|
assert_equal "your_project_name", task['project']
|
51
51
|
assert_equal "your_dataset_name", task['dataset']
|
52
|
+
assert_equal nil, task['location']
|
52
53
|
assert_equal "your_table_name", task['table']
|
53
54
|
assert_equal nil, task['dataset_old']
|
54
55
|
assert_equal nil, task['table_old']
|
@@ -103,6 +104,20 @@ module Embulk
|
|
103
104
|
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
104
105
|
end
|
105
106
|
|
107
|
+
def test_location
|
108
|
+
config = least_config.merge('location' => 'us')
|
109
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
110
|
+
|
111
|
+
config = least_config.merge('location' => 'eu')
|
112
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
113
|
+
|
114
|
+
config = least_config.merge('location' => 'asia-northeast1')
|
115
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
116
|
+
|
117
|
+
config = least_config.merge('location' => 'asia-northeast1', 'auto_create_gcs_bucket' => true)
|
118
|
+
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
119
|
+
end
|
120
|
+
|
106
121
|
def test_dataset_table_old
|
107
122
|
task = nil
|
108
123
|
config = least_config.merge('mode' => 'replace_backup', 'table_old' => 'backup')
|
data/test/test_helper.rb
CHANGED
@@ -94,6 +94,7 @@ module Embulk
|
|
94
94
|
def test_create_load_job_id
|
95
95
|
task = {
|
96
96
|
'dataset' => 'your_dataset_name',
|
97
|
+
'location' => 'asia-northeast1',
|
97
98
|
'table' => 'your_table_name',
|
98
99
|
'source_format' => 'CSV',
|
99
100
|
'max_bad_records' => nil,
|
@@ -108,6 +109,7 @@ module Embulk
|
|
108
109
|
File.write("tmp/your_file_name", "foobarbaz")
|
109
110
|
job_id = Helper.create_load_job_id(task, 'tmp/your_file_name', fields)
|
110
111
|
assert job_id.is_a?(String)
|
112
|
+
assert_equal 'embulk_load_job_2abaf528b69987db0224e52bbd1f0eec', job_id
|
111
113
|
end
|
112
114
|
end
|
113
115
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-05-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
175
|
version: '0'
|
176
176
|
requirements: []
|
177
177
|
rubyforge_project:
|
178
|
-
rubygems_version: 2.6.
|
178
|
+
rubygems_version: 2.6.13
|
179
179
|
signing_key:
|
180
180
|
specification_version: 4
|
181
181
|
summary: Google BigQuery output plugin for Embulk
|