embulk-output-bigquery 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 860095ce62db6591bce595ae0dc7687ef821195c4462f9542ac711e1026fe986
4
- data.tar.gz: a44f3f2925cb502d2c9a03ca0b8af7931582b6dd0112d4f50bc40a316e6877b9
2
+ SHA1:
3
+ metadata.gz: 4ff7af5986004058e6bfc02cca1f00e9809f61bc
4
+ data.tar.gz: 243490b5b6468a9b204b9f7369eea55559fa08b0
5
5
  SHA512:
6
- metadata.gz: 4e89c528a2151b6a348f5b1d9a8a3f7cf2b2fe7d61a062a9d95c195255e9d3598ac529a401010b260eb5f3821ef4c1b432ac947cbda2445368dcac5a6683726d
7
- data.tar.gz: 84f8250e8110add2c98fb2d164b11ad6307293a4172984a62e675579b8e83a5ac214e32a1a7b01c5a1fbf2f167389b10cb728eac4ecc9adb0490fdd6e453418b
6
+ metadata.gz: 1ace47fce8c23201c9a9cfabfc8f35700c81495eecdf34e255fb463af89f053709a892b9ec0b1603f468d2a09d33b64773bc1ec0909de745efd020a4136504eb
7
+ data.tar.gz: df1cbc7852e15de10bfd7d9fb88cb4c288926438a64beb7d7f88ebc077c7accf8c02e57778329249192f03b5b5da428eb2380074be4fcafc4d0cdcf640baed5b
@@ -1,3 +1,9 @@
1
+ ## 0.4.7 - 2017-05-02
2
+ * [enhancement] Support location option to allow to use 'asia-northeast1' region
3
+
4
+ ## 0.4.6 - 2017-04-17
5
+ * [enhancement] Support auth_method 'application_default'
6
+
1
7
  ## 0.4.5 - 2017-04-04
2
8
 
3
9
  * [maintenance] Fix deprecated warning log condition for `timeout_sec`
data/README.md CHANGED
@@ -44,6 +44,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
44
44
  | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
45
45
  | project | string | required if json_keyfile is not given | | project_id |
46
46
  | dataset | string | required | | dataset |
47
+ | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
47
48
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
48
49
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
49
50
  | auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
@@ -123,6 +124,14 @@ out:
123
124
  source_format: NEWLINE_DELIMITED_JSON
124
125
  ```
125
126
 
127
+ ### location
128
+
129
+ The geographic location of the dataset. Required except for US and EU.
130
+
131
+ `auto_create_table` isn't supported except for US and EU. And GCS bucket should be in same region when you use `gcs_bucket`.
132
+
133
+ See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
134
+
126
135
  ### mode
127
136
 
128
137
  5 modes are provided.
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.6"
3
+ spec.version = "0.4.7"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -39,6 +39,7 @@ module Embulk
39
39
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
40
40
  'project' => config.param('project', :string, :default => nil),
41
41
  'dataset' => config.param('dataset', :string),
42
+ 'location' => config.param('location', :string, :default => nil),
42
43
  'table' => config.param('table', :string),
43
44
  'dataset_old' => config.param('dataset_old', :string, :default => nil),
44
45
  'table_old' => config.param('table_old', :string, :default => nil),
@@ -112,6 +113,17 @@ module Embulk
112
113
  task['table_old'] ||= task['table']
113
114
  end
114
115
 
116
+ unless task['location'].nil?
117
+ task['location'] = task['location'].downcase
118
+ # google-api-client doesn't support create bucket with region
119
+ # We need to use Cloud Storage Client Libraries to support it
120
+ if task['auto_create_gcs_bucket']
121
+ unless %w[us eu].include?(task['location'])
122
+ raise ConfigError.new "`auto_create_gcs_bucket` isn't supported excepts in us/eu"
123
+ end
124
+ end
125
+ end
126
+
115
127
  if task['table_old']
116
128
  task['table_old'] = now.strftime(task['table_old'])
117
129
  end
@@ -19,6 +19,8 @@ module Embulk
19
19
  reset_fields(fields) if fields
20
20
  @project = @task['project']
21
21
  @dataset = @task['dataset']
22
+ @location = @task['location']
23
+ @location_for_log = @location.nil? ? 'us/eu' : @location
22
24
 
23
25
  @task['source_format'] ||= 'CSV'
24
26
  @task['max_bad_records'] ||= 0
@@ -82,7 +84,7 @@ module Embulk
82
84
  else
83
85
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
84
86
  end
85
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table}" }
87
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
86
88
 
87
89
  body = {
88
90
  job_reference: {
@@ -110,6 +112,10 @@ module Embulk
110
112
  }
111
113
  }
112
114
  }
115
+
116
+ if @location
117
+ body[:job_reference][:location] = @location
118
+ end
113
119
 
114
120
  if @task['schema_update_options']
115
121
  body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
@@ -128,7 +134,7 @@ module Embulk
128
134
  Embulk.logger.error {
129
135
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
130
136
  }
131
- raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table}, response:#{response}"
137
+ raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
132
138
  end
133
139
  end
134
140
  end
@@ -173,7 +179,7 @@ module Embulk
173
179
  else
174
180
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
175
181
  end
176
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table}" }
182
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
177
183
  else
178
184
  Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
179
185
  return
@@ -205,6 +211,10 @@ module Embulk
205
211
  }
206
212
  }
207
213
 
214
+ if @location
215
+ body[:job_reference][:location] = @location
216
+ end
217
+
208
218
  if @task['schema_update_options']
209
219
  body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
210
220
  end
@@ -230,7 +240,7 @@ module Embulk
230
240
  Embulk.logger.error {
231
241
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
232
242
  }
233
- raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table}, response:#{response}"
243
+ raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
234
244
  end
235
245
  end
236
246
  end
@@ -269,6 +279,10 @@ module Embulk
269
279
  }
270
280
  }
271
281
 
282
+ if @location
283
+ body[:job_reference][:location] = @location
284
+ end
285
+
272
286
  opts = {}
273
287
  Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
274
288
  response = with_network_retry { client.insert_job(@project, body, opts) }
@@ -312,7 +326,7 @@ module Embulk
312
326
  "job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
313
327
  }
314
328
  sleep wait_interval
315
- _response = with_network_retry { client.get_job(@project, job_id) }
329
+ _response = with_network_retry { client.get_job(@project, job_id, location: @location) }
316
330
  end
317
331
  end
318
332
 
@@ -341,7 +355,7 @@ module Embulk
341
355
  def create_dataset(dataset = nil, reference: nil)
342
356
  dataset ||= @dataset
343
357
  begin
344
- Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset}" }
358
+ Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
345
359
  hint = {}
346
360
  if reference
347
361
  response = get_dataset(reference)
@@ -353,8 +367,11 @@ module Embulk
353
367
  dataset_id: dataset,
354
368
  },
355
369
  }.merge(hint)
370
+ if @location
371
+ body[:location] = @location
372
+ end
356
373
  opts = {}
357
- Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{body}, #{opts})" }
374
+ Embulk.logger.debug { "embulk-output-bigquery: insert_dataset(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
358
375
  with_network_retry { client.insert_dataset(@project, body, opts) }
359
376
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
360
377
  if e.status_code == 409 && /Already Exists:/ =~ e.message
@@ -366,14 +383,14 @@ module Embulk
366
383
  Embulk.logger.error {
367
384
  "embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
368
385
  }
369
- raise Error, "failed to create dataset #{@project}:#{dataset}, response:#{response}"
386
+ raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
370
387
  end
371
388
  end
372
389
 
373
390
  def get_dataset(dataset = nil)
374
391
  dataset ||= @dataset
375
392
  begin
376
- Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{@dataset}" }
393
+ Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
377
394
  with_network_retry { client.get_dataset(@project, dataset) }
378
395
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
379
396
  if e.status_code == 404
@@ -416,7 +433,7 @@ module Embulk
416
433
  end
417
434
 
418
435
  opts = {}
419
- Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts})" }
436
+ Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
420
437
  with_network_retry { client.insert_table(@project, dataset, body, opts) }
421
438
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
422
439
  if e.status_code == 409 && /Already Exists:/ =~ e.message
@@ -426,9 +443,9 @@ module Embulk
426
443
 
427
444
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
428
445
  Embulk.logger.error {
429
- "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts}), response:#{response}"
446
+ "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
430
447
  }
431
- raise Error, "failed to create table #{@project}:#{dataset}.#{table}, response:#{response}"
448
+ raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
432
449
  end
433
450
  end
434
451
 
@@ -67,6 +67,7 @@ module Embulk
67
67
  elements = [
68
68
  Digest::MD5.file(path).hexdigest,
69
69
  task['dataset'],
70
+ task['location'],
70
71
  task['table'],
71
72
  fields,
72
73
  task['source_format'],
@@ -49,6 +49,7 @@ module Embulk
49
49
  assert_equal nil, task['json_keyfile']
50
50
  assert_equal "your_project_name", task['project']
51
51
  assert_equal "your_dataset_name", task['dataset']
52
+ assert_equal nil, task['location']
52
53
  assert_equal "your_table_name", task['table']
53
54
  assert_equal nil, task['dataset_old']
54
55
  assert_equal nil, task['table_old']
@@ -103,6 +104,20 @@ module Embulk
103
104
  assert_raise { Bigquery.configure(config, schema, processor_count) }
104
105
  end
105
106
 
107
+ def test_location
108
+ config = least_config.merge('location' => 'us')
109
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
110
+
111
+ config = least_config.merge('location' => 'eu')
112
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
113
+
114
+ config = least_config.merge('location' => 'asia-northeast1')
115
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
116
+
117
+ config = least_config.merge('location' => 'asia-northeast1', 'auto_create_gcs_bucket' => true)
118
+ assert_raise { Bigquery.configure(config, schema, processor_count) }
119
+ end
120
+
106
121
  def test_dataset_table_old
107
122
  task = nil
108
123
  config = least_config.merge('mode' => 'replace_backup', 'table_old' => 'backup')
@@ -94,6 +94,7 @@ module Embulk
94
94
  def test_create_load_job_id
95
95
  task = {
96
96
  'dataset' => 'your_dataset_name',
97
+ 'location' => 'asia-northeast1',
97
98
  'table' => 'your_table_name',
98
99
  'source_format' => 'CSV',
99
100
  'max_bad_records' => nil,
@@ -108,6 +109,7 @@ module Embulk
108
109
  File.write("tmp/your_file_name", "foobarbaz")
109
110
  job_id = Helper.create_load_job_id(task, 'tmp/your_file_name', fields)
110
111
  assert job_id.is_a?(String)
112
+ assert_equal 'embulk_load_job_2abaf528b69987db0224e52bbd1f0eec', job_id
111
113
  end
112
114
  end
113
115
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.4.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-04-17 00:00:00.000000000 Z
12
+ date: 2018-05-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
175
  version: '0'
176
176
  requirements: []
177
177
  rubyforge_project:
178
- rubygems_version: 2.6.11
178
+ rubygems_version: 2.6.13
179
179
  signing_key:
180
180
  specification_version: 4
181
181
  summary: Google BigQuery output plugin for Embulk