embulk-output-bigquery 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 860095ce62db6591bce595ae0dc7687ef821195c4462f9542ac711e1026fe986
4
- data.tar.gz: a44f3f2925cb502d2c9a03ca0b8af7931582b6dd0112d4f50bc40a316e6877b9
2
+ SHA1:
3
+ metadata.gz: 4ff7af5986004058e6bfc02cca1f00e9809f61bc
4
+ data.tar.gz: 243490b5b6468a9b204b9f7369eea55559fa08b0
5
5
  SHA512:
6
- metadata.gz: 4e89c528a2151b6a348f5b1d9a8a3f7cf2b2fe7d61a062a9d95c195255e9d3598ac529a401010b260eb5f3821ef4c1b432ac947cbda2445368dcac5a6683726d
7
- data.tar.gz: 84f8250e8110add2c98fb2d164b11ad6307293a4172984a62e675579b8e83a5ac214e32a1a7b01c5a1fbf2f167389b10cb728eac4ecc9adb0490fdd6e453418b
6
+ metadata.gz: 1ace47fce8c23201c9a9cfabfc8f35700c81495eecdf34e255fb463af89f053709a892b9ec0b1603f468d2a09d33b64773bc1ec0909de745efd020a4136504eb
7
+ data.tar.gz: df1cbc7852e15de10bfd7d9fb88cb4c288926438a64beb7d7f88ebc077c7accf8c02e57778329249192f03b5b5da428eb2380074be4fcafc4d0cdcf640baed5b
@@ -1,3 +1,9 @@
1
+ ## 0.4.7 - 2017-05-02
2
+ * [enhancement] Support location option to allow to use 'asia-northeast1' region
3
+
4
+ ## 0.4.6 - 2017-04-17
5
+ * [enhancement] Support auth_method 'application_default'
6
+
1
7
  ## 0.4.5 - 2017-04-04
2
8
 
3
9
  * [maintenance] Fix deprecated warning log condition for `timeout_sec`
data/README.md CHANGED
@@ -44,6 +44,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
44
44
  | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
45
45
  | project | string | required if json_keyfile is not given | | project_id |
46
46
  | dataset | string | required | | dataset |
47
+ | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
47
48
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
48
49
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
49
50
  | auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
@@ -123,6 +124,14 @@ out:
123
124
  source_format: NEWLINE_DELIMITED_JSON
124
125
  ```
125
126
 
127
+ ### location
128
+
129
+ The geographic location of the dataset. Required except for US and EU.
130
+
131
+ `auto_create_table` isn't supported except for US and EU. And GCS bucket should be in same region when you use `gcs_bucket`.
132
+
133
+ See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
134
+
126
135
  ### mode
127
136
 
128
137
  5 modes are provided.
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.6"
3
+ spec.version = "0.4.7"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -39,6 +39,7 @@ module Embulk
39
39
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
40
40
  'project' => config.param('project', :string, :default => nil),
41
41
  'dataset' => config.param('dataset', :string),
42
+ 'location' => config.param('location', :string, :default => nil),
42
43
  'table' => config.param('table', :string),
43
44
  'dataset_old' => config.param('dataset_old', :string, :default => nil),
44
45
  'table_old' => config.param('table_old', :string, :default => nil),
@@ -112,6 +113,17 @@ module Embulk
112
113
  task['table_old'] ||= task['table']
113
114
  end
114
115
 
116
+ unless task['location'].nil?
117
+ task['location'] = task['location'].downcase
118
+ # google-api-client doesn't support create bucket with region
119
+ # We need to use Cloud Storage Client Libraries to support it
120
+ if task['auto_create_gcs_bucket']
121
+ unless %w[us eu].include?(task['location'])
122
+ raise ConfigError.new "`auto_create_gcs_bucket` isn't supported excepts in us/eu"
123
+ end
124
+ end
125
+ end
126
+
115
127
  if task['table_old']
116
128
  task['table_old'] = now.strftime(task['table_old'])
117
129
  end
@@ -19,6 +19,8 @@ module Embulk
19
19
  reset_fields(fields) if fields
20
20
  @project = @task['project']
21
21
  @dataset = @task['dataset']
22
+ @location = @task['location']
23
+ @location_for_log = @location.nil? ? 'us/eu' : @location
22
24
 
23
25
  @task['source_format'] ||= 'CSV'
24
26
  @task['max_bad_records'] ||= 0
@@ -82,7 +84,7 @@ module Embulk
82
84
  else
83
85
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
84
86
  end
85
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table}" }
87
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
86
88
 
87
89
  body = {
88
90
  job_reference: {
@@ -110,6 +112,10 @@ module Embulk
110
112
  }
111
113
  }
112
114
  }
115
+
116
+ if @location
117
+ body[:job_reference][:location] = @location
118
+ end
113
119
 
114
120
  if @task['schema_update_options']
115
121
  body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
@@ -128,7 +134,7 @@ module Embulk
128
134
  Embulk.logger.error {
129
135
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
130
136
  }
131
- raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table}, response:#{response}"
137
+ raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
132
138
  end
133
139
  end
134
140
  end
@@ -173,7 +179,7 @@ module Embulk
173
179
  else
174
180
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
175
181
  end
176
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table}" }
182
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
177
183
  else
178
184
  Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
179
185
  return
@@ -205,6 +211,10 @@ module Embulk
205
211
  }
206
212
  }
207
213
 
214
+ if @location
215
+ body[:job_reference][:location] = @location
216
+ end
217
+
208
218
  if @task['schema_update_options']
209
219
  body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
210
220
  end
@@ -230,7 +240,7 @@ module Embulk
230
240
  Embulk.logger.error {
231
241
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
232
242
  }
233
- raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table}, response:#{response}"
243
+ raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
234
244
  end
235
245
  end
236
246
  end
@@ -269,6 +279,10 @@ module Embulk
269
279
  }
270
280
  }
271
281
 
282
+ if @location
283
+ body[:job_reference][:location] = @location
284
+ end
285
+
272
286
  opts = {}
273
287
  Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
274
288
  response = with_network_retry { client.insert_job(@project, body, opts) }
@@ -312,7 +326,7 @@ module Embulk
312
326
  "job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
313
327
  }
314
328
  sleep wait_interval
315
- _response = with_network_retry { client.get_job(@project, job_id) }
329
+ _response = with_network_retry { client.get_job(@project, job_id, location: @location) }
316
330
  end
317
331
  end
318
332
 
@@ -341,7 +355,7 @@ module Embulk
341
355
  def create_dataset(dataset = nil, reference: nil)
342
356
  dataset ||= @dataset
343
357
  begin
344
- Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset}" }
358
+ Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
345
359
  hint = {}
346
360
  if reference
347
361
  response = get_dataset(reference)
@@ -353,8 +367,11 @@ module Embulk
353
367
  dataset_id: dataset,
354
368
  },
355
369
  }.merge(hint)
370
+ if @location
371
+ body[:location] = @location
372
+ end
356
373
  opts = {}
357
- Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{body}, #{opts})" }
374
+ Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
358
375
  with_network_retry { client.insert_dataset(@project, body, opts) }
359
376
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
360
377
  if e.status_code == 409 && /Already Exists:/ =~ e.message
@@ -366,14 +383,14 @@ module Embulk
366
383
  Embulk.logger.error {
367
384
  "embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
368
385
  }
369
- raise Error, "failed to create dataset #{@project}:#{dataset}, response:#{response}"
386
+ raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
370
387
  end
371
388
  end
372
389
 
373
390
  def get_dataset(dataset = nil)
374
391
  dataset ||= @dataset
375
392
  begin
376
- Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{@dataset}" }
393
+ Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
377
394
  with_network_retry { client.get_dataset(@project, dataset) }
378
395
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
379
396
  if e.status_code == 404
@@ -416,7 +433,7 @@ module Embulk
416
433
  end
417
434
 
418
435
  opts = {}
419
- Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts})" }
436
+ Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
420
437
  with_network_retry { client.insert_table(@project, dataset, body, opts) }
421
438
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
422
439
  if e.status_code == 409 && /Already Exists:/ =~ e.message
@@ -426,9 +443,9 @@ module Embulk
426
443
 
427
444
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
428
445
  Embulk.logger.error {
429
- "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts}), response:#{response}"
446
+ "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
430
447
  }
431
- raise Error, "failed to create table #{@project}:#{dataset}.#{table}, response:#{response}"
448
+ raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
432
449
  end
433
450
  end
434
451
 
@@ -67,6 +67,7 @@ module Embulk
67
67
  elements = [
68
68
  Digest::MD5.file(path).hexdigest,
69
69
  task['dataset'],
70
+ task['location'],
70
71
  task['table'],
71
72
  fields,
72
73
  task['source_format'],
@@ -49,6 +49,7 @@ module Embulk
49
49
  assert_equal nil, task['json_keyfile']
50
50
  assert_equal "your_project_name", task['project']
51
51
  assert_equal "your_dataset_name", task['dataset']
52
+ assert_equal nil, task['location']
52
53
  assert_equal "your_table_name", task['table']
53
54
  assert_equal nil, task['dataset_old']
54
55
  assert_equal nil, task['table_old']
@@ -103,6 +104,20 @@ module Embulk
103
104
  assert_raise { Bigquery.configure(config, schema, processor_count) }
104
105
  end
105
106
 
107
+ def test_location
108
+ config = least_config.merge('location' => 'us')
109
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
110
+
111
+ config = least_config.merge('location' => 'eu')
112
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
113
+
114
+ config = least_config.merge('location' => 'asia-northeast1')
115
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
116
+
117
+ config = least_config.merge('location' => 'asia-northeast1', 'auto_create_gcs_bucket' => true)
118
+ assert_raise { Bigquery.configure(config, schema, processor_count) }
119
+ end
120
+
106
121
  def test_dataset_table_old
107
122
  task = nil
108
123
  config = least_config.merge('mode' => 'replace_backup', 'table_old' => 'backup')
@@ -94,6 +94,7 @@ module Embulk
94
94
  def test_create_load_job_id
95
95
  task = {
96
96
  'dataset' => 'your_dataset_name',
97
+ 'location' => 'asia-northeast1',
97
98
  'table' => 'your_table_name',
98
99
  'source_format' => 'CSV',
99
100
  'max_bad_records' => nil,
@@ -108,6 +109,7 @@ module Embulk
108
109
  File.write("tmp/your_file_name", "foobarbaz")
109
110
  job_id = Helper.create_load_job_id(task, 'tmp/your_file_name', fields)
110
111
  assert job_id.is_a?(String)
112
+ assert_equal 'embulk_load_job_2abaf528b69987db0224e52bbd1f0eec', job_id
111
113
  end
112
114
  end
113
115
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.4.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-04-17 00:00:00.000000000 Z
12
+ date: 2018-05-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
175
  version: '0'
176
176
  requirements: []
177
177
  rubyforge_project:
178
- rubygems_version: 2.6.11
178
+ rubygems_version: 2.6.13
179
179
  signing_key:
180
180
  specification_version: 4
181
181
  summary: Google BigQuery output plugin for Embulk