embulk-output-bigquery 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1cc922d557454c9b4e50d1bce1ad4625161c6894cbae47a392ed174ad4d2123f
4
- data.tar.gz: 285a811634c1a2ddad6e19389ba385910743be4ee598fa03501c6307db40ce4f
3
+ metadata.gz: d48b65d07302466f8f52dadb559ad049a054453db3741d4384209125e7b9e9cd
4
+ data.tar.gz: 13cd70568cfaebba819a9b7a9a51d1c45ff9f1599893b4a0b451e82dc84e40c9
5
5
  SHA512:
6
- metadata.gz: f510a2cc1ffec9b5ee2965a569b01920b4955a8a9fa784e173e67b47a7cef081dc07f4859d45e9914fe4afcec8f2aaacd4389db891546b818037560e0d9e0907
7
- data.tar.gz: 644008c32c635ee128cedde95918bf74d08291d026f2d9edd83d82d406772652b69b4312342f83435ef7dd4af6fa33d434647d4d1d8cea5e086317a6f3815e00
6
+ metadata.gz: ee51c9bf570ce2f2a55e43a5ab7842f1669d814b93cdd1395853be8f6b3ff770f6a5a6c9f9a81b6c0eca1e9bc72aff3d1302d37760fe559ecfa33a740e1da724
7
+ data.tar.gz: 8ada113513a089d786bf93bce1de98ad4bcc900ff73931c164a801b29e0bad9fd4b001bdf85962570998df995209e4ff320ba74e1fae22b61fb389a621121073
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.6.6 - 2021-06-10
2
+
3
+ * [maintenance] Fix network retry function (thanks to @case-k-git)
4
+ * [enhancement] Allow to specify the billing project and the project to which the data will be loaded separately (thanks to @ck-fm0211)
5
+ * [enhancement] Include original error message on json parse error (thanks to @k-yomo)
6
+
1
7
  ## 0.6.5 - 2021-06-10
2
8
  * [maintenance] Fix failed tests (thanks to @kyoshidajp)
3
9
  * [maintenance] Lock representable version for avoiding requiring Ruby 2.4 (thanks to @hiroyuki-sato)
data/README.md CHANGED
@@ -33,6 +33,7 @@ OAuth flow for installed applications.
33
33
  | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
34
  | json_keyfile | string | optional | | keyfile path or `content` |
35
35
  | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
36
+ | destination_project | string | optional | `project` value | A destination project to which the data will be loaded. Use this if you want to separate a billing project (the `project` value) and a destination project (the `destination_project` value). |
36
37
  | dataset | string | required | | dataset |
37
38
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
38
39
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.6.5"
3
+ spec.version = "0.6.6"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -16,9 +16,9 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  # TODO
18
18
  # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
- # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
- # So, Force install signet < 0.12 and google-api-client < 0.33.0
21
- # Also, representable veresion > 3.1.0 requires Ruby version >= 2.4
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
20
+ # So, force install signet < 0.12 and google-api-client < 0.33.0
21
+ # Also, representable version >= 3.1.0 requires Ruby version >= 2.4
22
22
  spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
23
23
  spec.add_dependency 'google-api-client','< 0.33.0'
24
24
  spec.add_dependency 'time_with_zone'
@@ -36,6 +36,7 @@ module Embulk
36
36
  'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
37
37
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
38
38
  'project' => config.param('project', :string, :default => nil),
39
+ 'destination_project' => config.param('destination_project', :string, :default => nil),
39
40
  'dataset' => config.param('dataset', :string),
40
41
  'location' => config.param('location', :string, :default => nil),
41
42
  'table' => config.param('table', :string),
@@ -135,12 +136,13 @@ module Embulk
135
136
  json_key = JSON.parse(task['json_keyfile'])
136
137
  task['project'] ||= json_key['project_id']
137
138
  rescue => e
138
- raise ConfigError.new "json_keyfile is not a JSON file"
139
+ raise ConfigError.new "Parsing 'json_keyfile' failed with error: #{e.class} #{e.message}"
139
140
  end
140
141
  end
141
142
  if task['project'].nil?
142
143
  raise ConfigError.new "Required field \"project\" is not set"
143
144
  end
145
+ task['destination_project'] ||= task['project']
144
146
 
145
147
  if (task['payload_column'] or task['payload_column_index']) and task['auto_create_table']
146
148
  if task['schema_file'].nil? and task['template_table'].nil?
@@ -166,7 +168,7 @@ module Embulk
166
168
  begin
167
169
  JSON.parse(File.read(task['schema_file']))
168
170
  rescue => e
169
- raise ConfigError.new "schema_file #{task['schema_file']} is not a JSON file"
171
+ raise ConfigError.new "Parsing 'schema_file' #{task['schema_file']} failed with error: #{e.class} #{e.message}"
170
172
  end
171
173
  end
172
174
 
@@ -18,6 +18,7 @@ module Embulk
18
18
  @schema = schema
19
19
  reset_fields(fields) if fields
20
20
  @project = @task['project']
21
+ @destination_project = @task['destination_project']
21
22
  @dataset = @task['dataset']
22
23
  @location = @task['location']
23
24
  @location_for_log = @location.nil? ? 'us/eu' : @location
@@ -80,7 +81,7 @@ module Embulk
80
81
  # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
81
82
  # we should generate job_id in client code, otherwise, retrying would cause duplication
82
83
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
83
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
84
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
84
85
 
85
86
  body = {
86
87
  job_reference: {
@@ -90,7 +91,7 @@ module Embulk
90
91
  configuration: {
91
92
  load: {
92
93
  destination_table: {
93
- project_id: @project,
94
+ project_id: @destination_project,
94
95
  dataset_id: @dataset,
95
96
  table_id: table,
96
97
  },
@@ -130,7 +131,7 @@ module Embulk
130
131
  Embulk.logger.error {
131
132
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
132
133
  }
133
- raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
134
+ raise Error, "failed to load #{object_uris} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
134
135
  end
135
136
  end
136
137
  end
@@ -171,7 +172,7 @@ module Embulk
171
172
  # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
172
173
  # we should generate job_id in client code, otherwise, retrying would cause duplication
173
174
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
174
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
175
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
175
176
  else
176
177
  Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
177
178
  return
@@ -185,7 +186,7 @@ module Embulk
185
186
  configuration: {
186
187
  load: {
187
188
  destination_table: {
188
- project_id: @project,
189
+ project_id: @destination_project,
189
190
  dataset_id: @dataset,
190
191
  table_id: table,
191
192
  },
@@ -232,7 +233,7 @@ module Embulk
232
233
  Embulk.logger.error {
233
234
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
234
235
  }
235
- raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
236
+ raise Error, "failed to load #{path} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
236
237
  end
237
238
  end
238
239
  end
@@ -245,7 +246,7 @@ module Embulk
245
246
 
246
247
  Embulk.logger.info {
247
248
  "embulk-output-bigquery: Copy job starting... job_id:[#{job_id}] " \
248
- "#{@project}:#{@dataset}.#{source_table} => #{@project}:#{destination_dataset}.#{destination_table}"
249
+ "#{@destination_project}:#{@dataset}.#{source_table} => #{@destination_project}:#{destination_dataset}.#{destination_table}"
249
250
  }
250
251
 
251
252
  body = {
@@ -258,12 +259,12 @@ module Embulk
258
259
  create_deposition: 'CREATE_IF_NEEDED',
259
260
  write_disposition: write_disposition,
260
261
  source_table: {
261
- project_id: @project,
262
+ project_id: @destination_project,
262
263
  dataset_id: @dataset,
263
264
  table_id: source_table,
264
265
  },
265
266
  destination_table: {
266
- project_id: @project,
267
+ project_id: @destination_project,
267
268
  dataset_id: destination_dataset,
268
269
  table_id: destination_table,
269
270
  },
@@ -284,8 +285,8 @@ module Embulk
284
285
  Embulk.logger.error {
285
286
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
286
287
  }
287
- raise Error, "failed to copy #{@project}:#{@dataset}.#{source_table} " \
288
- "to #{@project}:#{destination_dataset}.#{destination_table}, response:#{response}"
288
+ raise Error, "failed to copy #{@destination_project}:#{@dataset}.#{source_table} " \
289
+ "to #{@destination_project}:#{destination_dataset}.#{destination_table}, response:#{response}"
289
290
  end
290
291
  end
291
292
  end
@@ -354,7 +355,7 @@ module Embulk
354
355
  def create_dataset(dataset = nil, reference: nil)
355
356
  dataset ||= @dataset
356
357
  begin
357
- Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
358
+ Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@destination_project}:#{dataset} in #{@location_for_log}" }
358
359
  hint = {}
359
360
  if reference
360
361
  response = get_dataset(reference)
@@ -382,25 +383,25 @@ module Embulk
382
383
  Embulk.logger.error {
383
384
  "embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
384
385
  }
385
- raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
386
+ raise Error, "failed to create dataset #{@destination_project}:#{dataset} in #{@location_for_log}, response:#{response}"
386
387
  end
387
388
  end
388
389
 
389
390
  def get_dataset(dataset = nil)
390
391
  dataset ||= @dataset
391
392
  begin
392
- Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
393
- with_network_retry { client.get_dataset(@project, dataset) }
393
+ Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@destination_project}:#{dataset}" }
394
+ with_network_retry { client.get_dataset(@destination_project, dataset) }
394
395
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
395
396
  if e.status_code == 404
396
- raise NotFoundError, "Dataset #{@project}:#{dataset} is not found"
397
+ raise NotFoundError, "Dataset #{@destination_project}:#{dataset} is not found"
397
398
  end
398
399
 
399
400
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
400
401
  Embulk.logger.error {
401
- "embulk-output-bigquery: get_dataset(#{@project}, #{dataset}), response:#{response}"
402
+ "embulk-output-bigquery: get_dataset(#{@destination_project}, #{dataset}), response:#{response}"
402
403
  }
403
- raise Error, "failed to get dataset #{@project}:#{dataset}, response:#{response}"
404
+ raise Error, "failed to get dataset #{@destination_project}:#{dataset}, response:#{response}"
404
405
  end
405
406
  end
406
407
 
@@ -414,7 +415,7 @@ module Embulk
414
415
  table = Helper.chomp_partition_decorator(table)
415
416
  end
416
417
 
417
- Embulk.logger.info { "embulk-output-bigquery: Create table... #{@project}:#{dataset}.#{table}" }
418
+ Embulk.logger.info { "embulk-output-bigquery: Create table... #{@destination_project}:#{dataset}.#{table}" }
418
419
  body = {
419
420
  table_reference: {
420
421
  table_id: table,
@@ -452,7 +453,7 @@ module Embulk
452
453
  Embulk.logger.error {
453
454
  "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
454
455
  }
455
- raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
456
+ raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
456
457
  end
457
458
  end
458
459
 
@@ -469,8 +470,8 @@ module Embulk
469
470
  def delete_table_or_partition(table, dataset: nil)
470
471
  begin
471
472
  dataset ||= @dataset
472
- Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
473
- with_network_retry { client.delete_table(@project, dataset, table) }
473
+ Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@destination_project}:#{dataset}.#{table}" }
474
+ with_network_retry { client.delete_table(@destination_project, dataset, table) }
474
475
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
475
476
  if e.status_code == 404 && /Not found:/ =~ e.message
476
477
  # ignore 'Not Found' error
@@ -479,9 +480,9 @@ module Embulk
479
480
 
480
481
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
481
482
  Embulk.logger.error {
482
- "embulk-output-bigquery: delete_table(#{@project}, #{dataset}, #{table}), response:#{response}"
483
+ "embulk-output-bigquery: delete_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
483
484
  }
484
- raise Error, "failed to delete table #{@project}:#{dataset}.#{table}, response:#{response}"
485
+ raise Error, "failed to delete table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
485
486
  end
486
487
  end
487
488
 
@@ -497,18 +498,18 @@ module Embulk
497
498
  def get_table_or_partition(table, dataset: nil)
498
499
  begin
499
500
  dataset ||= @dataset
500
- Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
501
- with_network_retry { client.get_table(@project, dataset, table) }
501
+ Embulk.logger.info { "embulk-output-bigquery: Get table... #{@destination_project}:#{dataset}.#{table}" }
502
+ with_network_retry { client.get_table(@destination_project, dataset, table) }
502
503
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
503
504
  if e.status_code == 404
504
- raise NotFoundError, "Table #{@project}:#{dataset}.#{table} is not found"
505
+ raise NotFoundError, "Table #{@destination_project}:#{dataset}.#{table} is not found"
505
506
  end
506
507
 
507
508
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
508
509
  Embulk.logger.error {
509
- "embulk-output-bigquery: get_table(#{@project}, #{dataset}, #{table}), response:#{response}"
510
+ "embulk-output-bigquery: get_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
510
511
  }
511
- raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
512
+ raise Error, "failed to get table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
512
513
  end
513
514
  end
514
515
  end
@@ -16,6 +16,7 @@ module Embulk
16
16
  super(task, scope, client_class)
17
17
 
18
18
  @project = @task['project']
19
+ @destination_project = @task['destination_project']
19
20
  @bucket = @task['gcs_bucket']
20
21
  @location = @task['location']
21
22
  end
@@ -23,7 +24,7 @@ module Embulk
23
24
  def insert_temporary_bucket(bucket = nil)
24
25
  bucket ||= @bucket
25
26
  begin
26
- Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@project}:#{bucket}" }
27
+ Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@destination_project}:#{bucket}" }
27
28
  body = {
28
29
  name: bucket,
29
30
  lifecycle: {
@@ -57,7 +58,7 @@ module Embulk
57
58
  Embulk.logger.error {
58
59
  "embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
59
60
  }
60
- raise Error, "failed to insert bucket #{@project}:#{bucket}, response:#{response}"
61
+ raise Error, "failed to insert bucket #{@destination_project}:#{bucket}, response:#{response}"
61
62
  end
62
63
  end
63
64
 
@@ -69,7 +70,7 @@ module Embulk
69
70
 
70
71
  started = Time.now
71
72
  begin
72
- Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@project}:#{object_uri}" }
73
+ Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@destination_project}:#{object_uri}" }
73
74
  body = {
74
75
  name: object,
75
76
  }
@@ -86,7 +87,7 @@ module Embulk
86
87
  Embulk.logger.error {
87
88
  "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
88
89
  }
89
- raise Error, "failed to insert object #{@project}:#{object_uri}, response:#{response}"
90
+ raise Error, "failed to insert object #{@destination_project}:#{object_uri}, response:#{response}"
90
91
  end
91
92
  end
92
93
 
@@ -109,7 +110,7 @@ module Embulk
109
110
  object = object.start_with?('/') ? object[1..-1] : object
110
111
  object_uri = URI.join("gs://#{bucket}", object).to_s
111
112
  begin
112
- Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@project}:#{object_uri}" }
113
+ Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@destination_project}:#{object_uri}" }
113
114
  opts = {}
114
115
 
115
116
  Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
@@ -122,7 +123,7 @@ module Embulk
122
123
  Embulk.logger.error {
123
124
  "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
124
125
  }
125
- raise Error, "failed to delete object #{@project}:#{object_uri}, response:#{response}"
126
+ raise Error, "failed to delete object #{@destination_project}:#{object_uri}, response:#{response}"
126
127
  end
127
128
  end
128
129
  end
@@ -50,7 +50,9 @@ module Embulk
50
50
  begin
51
51
  yield
52
52
  rescue ::Java::Java.net.SocketException, ::Java::Java.net.ConnectException => e
53
- if ['Broken pipe', 'Connection reset', 'Connection timed out'].include?(e.message)
53
+ if ['Broken pipe', 'Connection reset', 'Connection timed out'].select { |x| e.message.include?(x) }.empty?
54
+ raise e
55
+ else
54
56
  if retries < @task['retries']
55
57
  retries += 1
56
58
  Embulk.logger.warn { "embulk-output-bigquery: retry \##{retries}, #{e.class} #{e.message}" }
@@ -59,8 +61,6 @@ module Embulk
59
61
  Embulk.logger.error { "embulk-output-bigquery: retry exhausted \##{retries}, #{e.class} #{e.message}" }
60
62
  raise e
61
63
  end
62
- else
63
- raise e
64
64
  end
65
65
  end
66
66
  end
@@ -29,6 +29,7 @@ else
29
29
  def least_task
30
30
  {
31
31
  'project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
32
+ 'destination_project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
32
33
  'dataset' => 'your_dataset_name',
33
34
  'table' => 'your_table_name',
34
35
  'auth_method' => 'json_key',
@@ -45,6 +45,7 @@ module Embulk
45
45
  assert_equal "application_default", task['auth_method']
46
46
  assert_equal nil, task['json_keyfile']
47
47
  assert_equal "your_project_name", task['project']
48
+ assert_equal "your_project_name", task['destination_project']
48
49
  assert_equal "your_dataset_name", task['dataset']
49
50
  assert_equal nil, task['location']
50
51
  assert_equal "your_table_name", task['table']
@@ -284,6 +285,16 @@ module Embulk
284
285
  config = least_config.merge('schema_update_options' => ['FOO'])
285
286
  assert_raise { Bigquery.configure(config, schema, processor_count) }
286
287
  end
288
+
289
+ def test_destination_project
290
+ config = least_config.merge('destination_project' => 'your_destination_project_name')
291
+ task = Bigquery.configure(config, schema, processor_count)
292
+
293
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
294
+ assert_equal 'your_destination_project_name', task['destination_project']
295
+ assert_equal 'your_project_name', task['project']
296
+ end
297
+
287
298
  end
288
299
  end
289
300
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-09 00:00:00.000000000 Z
12
+ date: 2021-06-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: signet