embulk-output-bigquery 0.6.5 → 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1cc922d557454c9b4e50d1bce1ad4625161c6894cbae47a392ed174ad4d2123f
4
- data.tar.gz: 285a811634c1a2ddad6e19389ba385910743be4ee598fa03501c6307db40ce4f
3
+ metadata.gz: d48b65d07302466f8f52dadb559ad049a054453db3741d4384209125e7b9e9cd
4
+ data.tar.gz: 13cd70568cfaebba819a9b7a9a51d1c45ff9f1599893b4a0b451e82dc84e40c9
5
5
  SHA512:
6
- metadata.gz: f510a2cc1ffec9b5ee2965a569b01920b4955a8a9fa784e173e67b47a7cef081dc07f4859d45e9914fe4afcec8f2aaacd4389db891546b818037560e0d9e0907
7
- data.tar.gz: 644008c32c635ee128cedde95918bf74d08291d026f2d9edd83d82d406772652b69b4312342f83435ef7dd4af6fa33d434647d4d1d8cea5e086317a6f3815e00
6
+ metadata.gz: ee51c9bf570ce2f2a55e43a5ab7842f1669d814b93cdd1395853be8f6b3ff770f6a5a6c9f9a81b6c0eca1e9bc72aff3d1302d37760fe559ecfa33a740e1da724
7
+ data.tar.gz: 8ada113513a089d786bf93bce1de98ad4bcc900ff73931c164a801b29e0bad9fd4b001bdf85962570998df995209e4ff320ba74e1fae22b61fb389a621121073
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.6.6 - 2021-06-10
2
+
3
+ * [maintenance] Fix network retry function (thanks to @case-k-git)
4
+ * [enhancement] Allow to specify the billing project and the project to which the data will be loaded separately (thanks to @ck-fm0211)
5
+ * [enhancement] Include original error message on json parse error (thanks to @k-yomo)
6
+
1
7
  ## 0.6.5 - 2021-06-10
2
8
  * [maintenance] Fix failed tests (thanks to @kyoshidajp)
3
9
  * [maintenance] Lock representable version for avoiding requiring Ruby 2.4 (thanks to @hiroyuki-sato)
data/README.md CHANGED
@@ -33,6 +33,7 @@ OAuth flow for installed applications.
33
33
  | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
34
  | json_keyfile | string | optional | | keyfile path or `content` |
35
35
  | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
36
+ | destination_project | string | optional | `project` value | A destination project to which the data will be loaded. Use this if you want to separate a billing project (the `project` value) and a destination project (the `destination_project` value). |
36
37
  | dataset | string | required | | dataset |
37
38
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
38
39
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.6.5"
3
+ spec.version = "0.6.6"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -16,9 +16,9 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  # TODO
18
18
  # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
- # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
- # So, Force install signet < 0.12 and google-api-client < 0.33.0
21
- # Also, representable veresion > 3.1.0 requires Ruby version >= 2.4
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
20
+ # So, force install signet < 0.12 and google-api-client < 0.33.0
21
+ # Also, representable version >= 3.1.0 requires Ruby version >= 2.4
22
22
  spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
23
23
  spec.add_dependency 'google-api-client','< 0.33.0'
24
24
  spec.add_dependency 'time_with_zone'
@@ -36,6 +36,7 @@ module Embulk
36
36
  'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
37
37
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
38
38
  'project' => config.param('project', :string, :default => nil),
39
+ 'destination_project' => config.param('destination_project', :string, :default => nil),
39
40
  'dataset' => config.param('dataset', :string),
40
41
  'location' => config.param('location', :string, :default => nil),
41
42
  'table' => config.param('table', :string),
@@ -135,12 +136,13 @@ module Embulk
135
136
  json_key = JSON.parse(task['json_keyfile'])
136
137
  task['project'] ||= json_key['project_id']
137
138
  rescue => e
138
- raise ConfigError.new "json_keyfile is not a JSON file"
139
+ raise ConfigError.new "Parsing 'json_keyfile' failed with error: #{e.class} #{e.message}"
139
140
  end
140
141
  end
141
142
  if task['project'].nil?
142
143
  raise ConfigError.new "Required field \"project\" is not set"
143
144
  end
145
+ task['destination_project'] ||= task['project']
144
146
 
145
147
  if (task['payload_column'] or task['payload_column_index']) and task['auto_create_table']
146
148
  if task['schema_file'].nil? and task['template_table'].nil?
@@ -166,7 +168,7 @@ module Embulk
166
168
  begin
167
169
  JSON.parse(File.read(task['schema_file']))
168
170
  rescue => e
169
- raise ConfigError.new "schema_file #{task['schema_file']} is not a JSON file"
171
+ raise ConfigError.new "Parsing 'schema_file' #{task['schema_file']} failed with error: #{e.class} #{e.message}"
170
172
  end
171
173
  end
172
174
 
@@ -18,6 +18,7 @@ module Embulk
18
18
  @schema = schema
19
19
  reset_fields(fields) if fields
20
20
  @project = @task['project']
21
+ @destination_project = @task['destination_project']
21
22
  @dataset = @task['dataset']
22
23
  @location = @task['location']
23
24
  @location_for_log = @location.nil? ? 'us/eu' : @location
@@ -80,7 +81,7 @@ module Embulk
80
81
  # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
81
82
  # we should generate job_id in client code, otherwise, retrying would cause duplication
82
83
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
83
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
84
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
84
85
 
85
86
  body = {
86
87
  job_reference: {
@@ -90,7 +91,7 @@ module Embulk
90
91
  configuration: {
91
92
  load: {
92
93
  destination_table: {
93
- project_id: @project,
94
+ project_id: @destination_project,
94
95
  dataset_id: @dataset,
95
96
  table_id: table,
96
97
  },
@@ -130,7 +131,7 @@ module Embulk
130
131
  Embulk.logger.error {
131
132
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
132
133
  }
133
- raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
134
+ raise Error, "failed to load #{object_uris} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
134
135
  end
135
136
  end
136
137
  end
@@ -171,7 +172,7 @@ module Embulk
171
172
  # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
172
173
  # we should generate job_id in client code, otherwise, retrying would cause duplication
173
174
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
174
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
175
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
175
176
  else
176
177
  Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
177
178
  return
@@ -185,7 +186,7 @@ module Embulk
185
186
  configuration: {
186
187
  load: {
187
188
  destination_table: {
188
- project_id: @project,
189
+ project_id: @destination_project,
189
190
  dataset_id: @dataset,
190
191
  table_id: table,
191
192
  },
@@ -232,7 +233,7 @@ module Embulk
232
233
  Embulk.logger.error {
233
234
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
234
235
  }
235
- raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
236
+ raise Error, "failed to load #{path} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
236
237
  end
237
238
  end
238
239
  end
@@ -245,7 +246,7 @@ module Embulk
245
246
 
246
247
  Embulk.logger.info {
247
248
  "embulk-output-bigquery: Copy job starting... job_id:[#{job_id}] " \
248
- "#{@project}:#{@dataset}.#{source_table} => #{@project}:#{destination_dataset}.#{destination_table}"
249
+ "#{@destination_project}:#{@dataset}.#{source_table} => #{@destination_project}:#{destination_dataset}.#{destination_table}"
249
250
  }
250
251
 
251
252
  body = {
@@ -258,12 +259,12 @@ module Embulk
258
259
  create_deposition: 'CREATE_IF_NEEDED',
259
260
  write_disposition: write_disposition,
260
261
  source_table: {
261
- project_id: @project,
262
+ project_id: @destination_project,
262
263
  dataset_id: @dataset,
263
264
  table_id: source_table,
264
265
  },
265
266
  destination_table: {
266
- project_id: @project,
267
+ project_id: @destination_project,
267
268
  dataset_id: destination_dataset,
268
269
  table_id: destination_table,
269
270
  },
@@ -284,8 +285,8 @@ module Embulk
284
285
  Embulk.logger.error {
285
286
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
286
287
  }
287
- raise Error, "failed to copy #{@project}:#{@dataset}.#{source_table} " \
288
- "to #{@project}:#{destination_dataset}.#{destination_table}, response:#{response}"
288
+ raise Error, "failed to copy #{@destination_project}:#{@dataset}.#{source_table} " \
289
+ "to #{@destination_project}:#{destination_dataset}.#{destination_table}, response:#{response}"
289
290
  end
290
291
  end
291
292
  end
@@ -354,7 +355,7 @@ module Embulk
354
355
  def create_dataset(dataset = nil, reference: nil)
355
356
  dataset ||= @dataset
356
357
  begin
357
- Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
358
+ Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@destination_project}:#{dataset} in #{@location_for_log}" }
358
359
  hint = {}
359
360
  if reference
360
361
  response = get_dataset(reference)
@@ -382,25 +383,25 @@ module Embulk
382
383
  Embulk.logger.error {
383
384
  "embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
384
385
  }
385
- raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
386
+ raise Error, "failed to create dataset #{@destination_project}:#{dataset} in #{@location_for_log}, response:#{response}"
386
387
  end
387
388
  end
388
389
 
389
390
  def get_dataset(dataset = nil)
390
391
  dataset ||= @dataset
391
392
  begin
392
- Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
393
- with_network_retry { client.get_dataset(@project, dataset) }
393
+ Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@destination_project}:#{dataset}" }
394
+ with_network_retry { client.get_dataset(@destination_project, dataset) }
394
395
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
395
396
  if e.status_code == 404
396
- raise NotFoundError, "Dataset #{@project}:#{dataset} is not found"
397
+ raise NotFoundError, "Dataset #{@destination_project}:#{dataset} is not found"
397
398
  end
398
399
 
399
400
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
400
401
  Embulk.logger.error {
401
- "embulk-output-bigquery: get_dataset(#{@project}, #{dataset}), response:#{response}"
402
+ "embulk-output-bigquery: get_dataset(#{@destination_project}, #{dataset}), response:#{response}"
402
403
  }
403
- raise Error, "failed to get dataset #{@project}:#{dataset}, response:#{response}"
404
+ raise Error, "failed to get dataset #{@destination_project}:#{dataset}, response:#{response}"
404
405
  end
405
406
  end
406
407
 
@@ -414,7 +415,7 @@ module Embulk
414
415
  table = Helper.chomp_partition_decorator(table)
415
416
  end
416
417
 
417
- Embulk.logger.info { "embulk-output-bigquery: Create table... #{@project}:#{dataset}.#{table}" }
418
+ Embulk.logger.info { "embulk-output-bigquery: Create table... #{@destination_project}:#{dataset}.#{table}" }
418
419
  body = {
419
420
  table_reference: {
420
421
  table_id: table,
@@ -452,7 +453,7 @@ module Embulk
452
453
  Embulk.logger.error {
453
454
  "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
454
455
  }
455
- raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
456
+ raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
456
457
  end
457
458
  end
458
459
 
@@ -469,8 +470,8 @@ module Embulk
469
470
  def delete_table_or_partition(table, dataset: nil)
470
471
  begin
471
472
  dataset ||= @dataset
472
- Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
473
- with_network_retry { client.delete_table(@project, dataset, table) }
473
+ Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@destination_project}:#{dataset}.#{table}" }
474
+ with_network_retry { client.delete_table(@destination_project, dataset, table) }
474
475
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
475
476
  if e.status_code == 404 && /Not found:/ =~ e.message
476
477
  # ignore 'Not Found' error
@@ -479,9 +480,9 @@ module Embulk
479
480
 
480
481
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
481
482
  Embulk.logger.error {
482
- "embulk-output-bigquery: delete_table(#{@project}, #{dataset}, #{table}), response:#{response}"
483
+ "embulk-output-bigquery: delete_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
483
484
  }
484
- raise Error, "failed to delete table #{@project}:#{dataset}.#{table}, response:#{response}"
485
+ raise Error, "failed to delete table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
485
486
  end
486
487
  end
487
488
 
@@ -497,18 +498,18 @@ module Embulk
497
498
  def get_table_or_partition(table, dataset: nil)
498
499
  begin
499
500
  dataset ||= @dataset
500
- Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
501
- with_network_retry { client.get_table(@project, dataset, table) }
501
+ Embulk.logger.info { "embulk-output-bigquery: Get table... #{@destination_project}:#{dataset}.#{table}" }
502
+ with_network_retry { client.get_table(@destination_project, dataset, table) }
502
503
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
503
504
  if e.status_code == 404
504
- raise NotFoundError, "Table #{@project}:#{dataset}.#{table} is not found"
505
+ raise NotFoundError, "Table #{@destination_project}:#{dataset}.#{table} is not found"
505
506
  end
506
507
 
507
508
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
508
509
  Embulk.logger.error {
509
- "embulk-output-bigquery: get_table(#{@project}, #{dataset}, #{table}), response:#{response}"
510
+ "embulk-output-bigquery: get_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
510
511
  }
511
- raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
512
+ raise Error, "failed to get table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
512
513
  end
513
514
  end
514
515
  end
@@ -16,6 +16,7 @@ module Embulk
16
16
  super(task, scope, client_class)
17
17
 
18
18
  @project = @task['project']
19
+ @destination_project = @task['destination_project']
19
20
  @bucket = @task['gcs_bucket']
20
21
  @location = @task['location']
21
22
  end
@@ -23,7 +24,7 @@ module Embulk
23
24
  def insert_temporary_bucket(bucket = nil)
24
25
  bucket ||= @bucket
25
26
  begin
26
- Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@project}:#{bucket}" }
27
+ Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@destination_project}:#{bucket}" }
27
28
  body = {
28
29
  name: bucket,
29
30
  lifecycle: {
@@ -57,7 +58,7 @@ module Embulk
57
58
  Embulk.logger.error {
58
59
  "embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
59
60
  }
60
- raise Error, "failed to insert bucket #{@project}:#{bucket}, response:#{response}"
61
+ raise Error, "failed to insert bucket #{@destination_project}:#{bucket}, response:#{response}"
61
62
  end
62
63
  end
63
64
 
@@ -69,7 +70,7 @@ module Embulk
69
70
 
70
71
  started = Time.now
71
72
  begin
72
- Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@project}:#{object_uri}" }
73
+ Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@destination_project}:#{object_uri}" }
73
74
  body = {
74
75
  name: object,
75
76
  }
@@ -86,7 +87,7 @@ module Embulk
86
87
  Embulk.logger.error {
87
88
  "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
88
89
  }
89
- raise Error, "failed to insert object #{@project}:#{object_uri}, response:#{response}"
90
+ raise Error, "failed to insert object #{@destination_project}:#{object_uri}, response:#{response}"
90
91
  end
91
92
  end
92
93
 
@@ -109,7 +110,7 @@ module Embulk
109
110
  object = object.start_with?('/') ? object[1..-1] : object
110
111
  object_uri = URI.join("gs://#{bucket}", object).to_s
111
112
  begin
112
- Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@project}:#{object_uri}" }
113
+ Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@destination_project}:#{object_uri}" }
113
114
  opts = {}
114
115
 
115
116
  Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
@@ -122,7 +123,7 @@ module Embulk
122
123
  Embulk.logger.error {
123
124
  "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
124
125
  }
125
- raise Error, "failed to delete object #{@project}:#{object_uri}, response:#{response}"
126
+ raise Error, "failed to delete object #{@destination_project}:#{object_uri}, response:#{response}"
126
127
  end
127
128
  end
128
129
  end
@@ -50,7 +50,9 @@ module Embulk
50
50
  begin
51
51
  yield
52
52
  rescue ::Java::Java.net.SocketException, ::Java::Java.net.ConnectException => e
53
- if ['Broken pipe', 'Connection reset', 'Connection timed out'].include?(e.message)
53
+ if ['Broken pipe', 'Connection reset', 'Connection timed out'].select { |x| e.message.include?(x) }.empty?
54
+ raise e
55
+ else
54
56
  if retries < @task['retries']
55
57
  retries += 1
56
58
  Embulk.logger.warn { "embulk-output-bigquery: retry \##{retries}, #{e.class} #{e.message}" }
@@ -59,8 +61,6 @@ module Embulk
59
61
  Embulk.logger.error { "embulk-output-bigquery: retry exhausted \##{retries}, #{e.class} #{e.message}" }
60
62
  raise e
61
63
  end
62
- else
63
- raise e
64
64
  end
65
65
  end
66
66
  end
@@ -29,6 +29,7 @@ else
29
29
  def least_task
30
30
  {
31
31
  'project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
32
+ 'destination_project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
32
33
  'dataset' => 'your_dataset_name',
33
34
  'table' => 'your_table_name',
34
35
  'auth_method' => 'json_key',
@@ -45,6 +45,7 @@ module Embulk
45
45
  assert_equal "application_default", task['auth_method']
46
46
  assert_equal nil, task['json_keyfile']
47
47
  assert_equal "your_project_name", task['project']
48
+ assert_equal "your_project_name", task['destination_project']
48
49
  assert_equal "your_dataset_name", task['dataset']
49
50
  assert_equal nil, task['location']
50
51
  assert_equal "your_table_name", task['table']
@@ -284,6 +285,16 @@ module Embulk
284
285
  config = least_config.merge('schema_update_options' => ['FOO'])
285
286
  assert_raise { Bigquery.configure(config, schema, processor_count) }
286
287
  end
288
+
289
+ def test_destination_project
290
+ config = least_config.merge('destination_project' => 'your_destination_project_name')
291
+ task = Bigquery.configure(config, schema, processor_count)
292
+
293
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
294
+ assert_equal 'your_destination_project_name', task['destination_project']
295
+ assert_equal 'your_project_name', task['project']
296
+ end
297
+
287
298
  end
288
299
  end
289
300
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-09 00:00:00.000000000 Z
12
+ date: 2021-06-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: signet