embulk-output-bigquery 0.6.3 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 8b3d7d7d675d8428946f81517d1002f667f4fafe
4
- data.tar.gz: 25940b93f70492675869d3c4dd50f83f8b7347cf
2
+ SHA256:
3
+ metadata.gz: 5ad08405eb0d6f4a7ff867c6252d393628437d55e03fe33ec2cc71f940926153
4
+ data.tar.gz: 431ce1cf5298d5ec114a66191c9847f34fd554cc5de7c12fca8950a9e59538bf
5
5
  SHA512:
6
- metadata.gz: 97a2aff66c765f24289717ac79e0a25a6bf31ee3ec5b84b64c96e8573382b31b0a27c30f06692a296b3bfedd70ea9f34f1a451cea7de27d3fa4c61a7502bab98
7
- data.tar.gz: b795d47af337e109dfafb9f41a0a720d0eb314c7ba7219193648505ec9dffa3874215b5d311256f625228a4f3e52b73153ee3d694a3d2f88d4c2fd0dd24960b1
6
+ metadata.gz: c23e79e1b7e6d7b2af8455e97fd58bb4c4bc5e7b7fb1809bd49642294adfe644c27beca55177cff8427f5136532accae3973db1b000660c96f532e6d1cbfbc09
7
+ data.tar.gz: 2d6ee9d0cf3504683bc2898df3edc246401a2cfd44db2f6311368903e89e09e9b31e746232005fdbbf43f039b91ca1fe1b4f190704ceee3e7a9778962de5be1b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ ## 0.6.7 - 2021-09-10
2
+ * [enhancement] Add an expiration option of temporary table to clean up (thanks to @TKNGUE)
3
+
4
+ ## 0.6.6 - 2021-06-10
5
+
6
+ * [maintenance] Fix network retry function (thanks to @case-k-git)
7
+ * [enhancement] Allow to specify the billing project and the project to which the data will be loaded separately (thanks to @ck-fm0211)
8
+ * [enhancement] Include original error message on json parse error (thanks to @k-yomo)
9
+
10
+ ## 0.6.5 - 2021-06-10
11
+ * [maintenance] Fix failed tests (thanks to @kyoshidajp)
12
+ * [maintenance] Lock representable version for avoiding requiring Ruby 2.4 (thanks to @hiroyuki-sato)
13
+
14
+ ## 0.6.4 - 2019-11-06
15
+
16
+ * [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
17
+
1
18
  ## 0.6.3 - 2019-10-28
2
19
 
3
20
  * [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
data/Gemfile CHANGED
@@ -1,7 +1,7 @@
1
1
  source 'https://rubygems.org/'
2
2
 
3
3
  gemspec
4
- gem 'embulk'
4
+ gem 'embulk', '< 0.10'
5
5
  gem 'liquid', '= 4.0.0' # the version included in embulk.jar
6
6
  gem 'embulk-parser-none'
7
7
  gem 'embulk-parser-jsonl'
data/README.md CHANGED
@@ -33,6 +33,7 @@ OAuth flow for installed applications.
33
33
  | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
34
  | json_keyfile | string | optional | | keyfile path or `content` |
35
35
  | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
36
+ | destination_project | string | optional | `project` value | A destination project to which the data will be loaded. Use this if you want to separate a billing project (the `project` value) and a destination project (the `destination_project` value). |
36
37
  | dataset | string | required | | dataset |
37
38
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
38
39
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
@@ -78,6 +79,13 @@ Options for intermediate local files
78
79
  | delete_from_local_when_job_end | boolean | optional | true | If set to true, delete generate local files when job is end |
79
80
  | compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
80
81
 
82
+
83
+ Options for intermediate tables on BigQuery
84
+
85
+ | name | type | required? | default | description |
86
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
87
+ | temporary_table_expiration | integer | optional | | Temporary table's expiration time in seconds |
88
+
81
89
  `source_format` is also used to determine formatter (csv or jsonl).
82
90
 
83
91
  #### Same options of bq command-line tools or BigQuery job's property
@@ -307,12 +315,12 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
307
315
 
308
316
  - **column_options**: advanced: an array of options for columns
309
317
  - **name**: column name
310
- - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, and `RECORD`. See belows for supported conversion type.
318
+ - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
311
319
  - boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
312
320
  - long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
313
321
  - double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
314
- - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, `RECORD` (default: `STRING`)
315
- - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE` (default: `TIMESTAMP`)
322
+ - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
323
+ - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
316
324
  - json: `STRING`, `RECORD` (default: `STRING`)
317
325
  - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
318
326
  - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.6.3"
3
+ spec.version = "0.6.7"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -16,11 +16,16 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  # TODO
18
18
  # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
- # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
- # So, Force install signet < 0.12 and google-api-client < 0.33.0
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and it's compatible with Ruby 2.3.
20
+ # So, force install signet < 0.12 and google-api-client < 0.33.0
21
+ # Also, representable version >= 3.1.0 requires Ruby version >= 2.4
21
22
  spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
22
23
  spec.add_dependency 'google-api-client','< 0.33.0'
23
24
  spec.add_dependency 'time_with_zone'
25
+ spec.add_dependency "representable", ['~> 3.0.0', '< 3.1']
26
+ # faraday 1.1.0 require >= Ruby 2.4.
27
+ # googleauth 0.9.0 requires faraday ~> 0.12
28
+ spec.add_dependency "faraday", '~> 0.12'
24
29
 
25
30
  spec.add_development_dependency 'bundler', ['>= 1.10.6']
26
31
  spec.add_development_dependency 'rake', ['>= 10.0']
@@ -18,6 +18,7 @@ module Embulk
18
18
  @schema = schema
19
19
  reset_fields(fields) if fields
20
20
  @project = @task['project']
21
+ @destination_project = @task['destination_project']
21
22
  @dataset = @task['dataset']
22
23
  @location = @task['location']
23
24
  @location_for_log = @location.nil? ? 'us/eu' : @location
@@ -80,7 +81,7 @@ module Embulk
80
81
  # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
81
82
  # we should generate job_id in client code, otherwise, retrying would cause duplication
82
83
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
83
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
84
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
84
85
 
85
86
  body = {
86
87
  job_reference: {
@@ -90,7 +91,7 @@ module Embulk
90
91
  configuration: {
91
92
  load: {
92
93
  destination_table: {
93
- project_id: @project,
94
+ project_id: @destination_project,
94
95
  dataset_id: @dataset,
95
96
  table_id: table,
96
97
  },
@@ -130,7 +131,7 @@ module Embulk
130
131
  Embulk.logger.error {
131
132
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
132
133
  }
133
- raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
134
+ raise Error, "failed to load #{object_uris} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
134
135
  end
135
136
  end
136
137
  end
@@ -171,7 +172,7 @@ module Embulk
171
172
  # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
172
173
  # we should generate job_id in client code, otherwise, retrying would cause duplication
173
174
  job_id = "embulk_load_job_#{SecureRandom.uuid}"
174
- Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@project}:#{@dataset}.#{table} in #{@location_for_log}" }
175
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{path} => #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}" }
175
176
  else
176
177
  Embulk.logger.info { "embulk-output-bigquery: Load job starting... #{path} does not exist, skipped" }
177
178
  return
@@ -185,7 +186,7 @@ module Embulk
185
186
  configuration: {
186
187
  load: {
187
188
  destination_table: {
188
- project_id: @project,
189
+ project_id: @destination_project,
189
190
  dataset_id: @dataset,
190
191
  table_id: table,
191
192
  },
@@ -232,7 +233,7 @@ module Embulk
232
233
  Embulk.logger.error {
233
234
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
234
235
  }
235
- raise Error, "failed to load #{path} to #{@project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
236
+ raise Error, "failed to load #{path} to #{@destination_project}:#{@dataset}.#{table} in #{@location_for_log}, response:#{response}"
236
237
  end
237
238
  end
238
239
  end
@@ -245,7 +246,7 @@ module Embulk
245
246
 
246
247
  Embulk.logger.info {
247
248
  "embulk-output-bigquery: Copy job starting... job_id:[#{job_id}] " \
248
- "#{@project}:#{@dataset}.#{source_table} => #{@project}:#{destination_dataset}.#{destination_table}"
249
+ "#{@destination_project}:#{@dataset}.#{source_table} => #{@destination_project}:#{destination_dataset}.#{destination_table}"
249
250
  }
250
251
 
251
252
  body = {
@@ -258,12 +259,12 @@ module Embulk
258
259
  create_deposition: 'CREATE_IF_NEEDED',
259
260
  write_disposition: write_disposition,
260
261
  source_table: {
261
- project_id: @project,
262
+ project_id: @destination_project,
262
263
  dataset_id: @dataset,
263
264
  table_id: source_table,
264
265
  },
265
266
  destination_table: {
266
- project_id: @project,
267
+ project_id: @destination_project,
267
268
  dataset_id: destination_dataset,
268
269
  table_id: destination_table,
269
270
  },
@@ -284,8 +285,8 @@ module Embulk
284
285
  Embulk.logger.error {
285
286
  "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
286
287
  }
287
- raise Error, "failed to copy #{@project}:#{@dataset}.#{source_table} " \
288
- "to #{@project}:#{destination_dataset}.#{destination_table}, response:#{response}"
288
+ raise Error, "failed to copy #{@destination_project}:#{@dataset}.#{source_table} " \
289
+ "to #{@destination_project}:#{destination_dataset}.#{destination_table}, response:#{response}"
289
290
  end
290
291
  end
291
292
  end
@@ -354,7 +355,7 @@ module Embulk
354
355
  def create_dataset(dataset = nil, reference: nil)
355
356
  dataset ||= @dataset
356
357
  begin
357
- Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@project}:#{dataset} in #{@location_for_log}" }
358
+ Embulk.logger.info { "embulk-output-bigquery: Create dataset... #{@destination_project}:#{dataset} in #{@location_for_log}" }
358
359
  hint = {}
359
360
  if reference
360
361
  response = get_dataset(reference)
@@ -382,25 +383,25 @@ module Embulk
382
383
  Embulk.logger.error {
383
384
  "embulk-output-bigquery: insert_dataset(#{@project}, #{body}, #{opts}), response:#{response}"
384
385
  }
385
- raise Error, "failed to create dataset #{@project}:#{dataset} in #{@location_for_log}, response:#{response}"
386
+ raise Error, "failed to create dataset #{@destination_project}:#{dataset} in #{@location_for_log}, response:#{response}"
386
387
  end
387
388
  end
388
389
 
389
390
  def get_dataset(dataset = nil)
390
391
  dataset ||= @dataset
391
392
  begin
392
- Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@project}:#{dataset}" }
393
- with_network_retry { client.get_dataset(@project, dataset) }
393
+ Embulk.logger.info { "embulk-output-bigquery: Get dataset... #{@destination_project}:#{dataset}" }
394
+ with_network_retry { client.get_dataset(@destination_project, dataset) }
394
395
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
395
396
  if e.status_code == 404
396
- raise NotFoundError, "Dataset #{@project}:#{dataset} is not found"
397
+ raise NotFoundError, "Dataset #{@destination_project}:#{dataset} is not found"
397
398
  end
398
399
 
399
400
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
400
401
  Embulk.logger.error {
401
- "embulk-output-bigquery: get_dataset(#{@project}, #{dataset}), response:#{response}"
402
+ "embulk-output-bigquery: get_dataset(#{@destination_project}, #{dataset}), response:#{response}"
402
403
  }
403
- raise Error, "failed to get dataset #{@project}:#{dataset}, response:#{response}"
404
+ raise Error, "failed to get dataset #{@destination_project}:#{dataset}, response:#{response}"
404
405
  end
405
406
  end
406
407
 
@@ -414,7 +415,7 @@ module Embulk
414
415
  table = Helper.chomp_partition_decorator(table)
415
416
  end
416
417
 
417
- Embulk.logger.info { "embulk-output-bigquery: Create table... #{@project}:#{dataset}.#{table}" }
418
+ Embulk.logger.info { "embulk-output-bigquery: Create table... #{@destination_project}:#{dataset}.#{table}" }
418
419
  body = {
419
420
  table_reference: {
420
421
  table_id: table,
@@ -439,6 +440,11 @@ module Embulk
439
440
  }
440
441
  end
441
442
 
443
+ if options['expiration_time']
444
+ # expiration_time is expressed in milliseconds
445
+ body[:expiration_time] = (Time.now.to_i + options['expiration_time']) * 1000
446
+ end
447
+
442
448
  opts = {}
443
449
  Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
444
450
  with_network_retry { client.insert_table(@project, dataset, body, opts) }
@@ -452,7 +458,7 @@ module Embulk
452
458
  Embulk.logger.error {
453
459
  "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts}), response:#{response}"
454
460
  }
455
- raise Error, "failed to create table #{@project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
461
+ raise Error, "failed to create table #{@destination_project}:#{dataset}.#{table} in #{@location_for_log}, response:#{response}"
456
462
  end
457
463
  end
458
464
 
@@ -469,8 +475,8 @@ module Embulk
469
475
  def delete_table_or_partition(table, dataset: nil)
470
476
  begin
471
477
  dataset ||= @dataset
472
- Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
473
- with_network_retry { client.delete_table(@project, dataset, table) }
478
+ Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@destination_project}:#{dataset}.#{table}" }
479
+ with_network_retry { client.delete_table(@destination_project, dataset, table) }
474
480
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
475
481
  if e.status_code == 404 && /Not found:/ =~ e.message
476
482
  # ignore 'Not Found' error
@@ -479,9 +485,9 @@ module Embulk
479
485
 
480
486
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
481
487
  Embulk.logger.error {
482
- "embulk-output-bigquery: delete_table(#{@project}, #{dataset}, #{table}), response:#{response}"
488
+ "embulk-output-bigquery: delete_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
483
489
  }
484
- raise Error, "failed to delete table #{@project}:#{dataset}.#{table}, response:#{response}"
490
+ raise Error, "failed to delete table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
485
491
  end
486
492
  end
487
493
 
@@ -497,18 +503,18 @@ module Embulk
497
503
  def get_table_or_partition(table, dataset: nil)
498
504
  begin
499
505
  dataset ||= @dataset
500
- Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
501
- with_network_retry { client.get_table(@project, dataset, table) }
506
+ Embulk.logger.info { "embulk-output-bigquery: Get table... #{@destination_project}:#{dataset}.#{table}" }
507
+ with_network_retry { client.get_table(@destination_project, dataset, table) }
502
508
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
503
509
  if e.status_code == 404
504
- raise NotFoundError, "Table #{@project}:#{dataset}.#{table} is not found"
510
+ raise NotFoundError, "Table #{@destination_project}:#{dataset}.#{table} is not found"
505
511
  end
506
512
 
507
513
  response = {status_code: e.status_code, message: e.message, error_class: e.class}
508
514
  Embulk.logger.error {
509
- "embulk-output-bigquery: get_table(#{@project}, #{dataset}, #{table}), response:#{response}"
515
+ "embulk-output-bigquery: get_table(#{@destination_project}, #{dataset}, #{table}), response:#{response}"
510
516
  }
511
- raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
517
+ raise Error, "failed to get table #{@destination_project}:#{dataset}.#{table}, response:#{response}"
512
518
  end
513
519
  end
514
520
  end
@@ -16,6 +16,7 @@ module Embulk
16
16
  super(task, scope, client_class)
17
17
 
18
18
  @project = @task['project']
19
+ @destination_project = @task['destination_project']
19
20
  @bucket = @task['gcs_bucket']
20
21
  @location = @task['location']
21
22
  end
@@ -23,7 +24,7 @@ module Embulk
23
24
  def insert_temporary_bucket(bucket = nil)
24
25
  bucket ||= @bucket
25
26
  begin
26
- Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@project}:#{bucket}" }
27
+ Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@destination_project}:#{bucket}" }
27
28
  body = {
28
29
  name: bucket,
29
30
  lifecycle: {
@@ -57,7 +58,7 @@ module Embulk
57
58
  Embulk.logger.error {
58
59
  "embulk-output-bigquery: insert_temporary_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
59
60
  }
60
- raise Error, "failed to insert bucket #{@project}:#{bucket}, response:#{response}"
61
+ raise Error, "failed to insert bucket #{@destination_project}:#{bucket}, response:#{response}"
61
62
  end
62
63
  end
63
64
 
@@ -69,7 +70,7 @@ module Embulk
69
70
 
70
71
  started = Time.now
71
72
  begin
72
- Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@project}:#{object_uri}" }
73
+ Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@destination_project}:#{object_uri}" }
73
74
  body = {
74
75
  name: object,
75
76
  }
@@ -86,7 +87,7 @@ module Embulk
86
87
  Embulk.logger.error {
87
88
  "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
88
89
  }
89
- raise Error, "failed to insert object #{@project}:#{object_uri}, response:#{response}"
90
+ raise Error, "failed to insert object #{@destination_project}:#{object_uri}, response:#{response}"
90
91
  end
91
92
  end
92
93
 
@@ -109,7 +110,7 @@ module Embulk
109
110
  object = object.start_with?('/') ? object[1..-1] : object
110
111
  object_uri = URI.join("gs://#{bucket}", object).to_s
111
112
  begin
112
- Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@project}:#{object_uri}" }
113
+ Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@destination_project}:#{object_uri}" }
113
114
  opts = {}
114
115
 
115
116
  Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
@@ -122,7 +123,7 @@ module Embulk
122
123
  Embulk.logger.error {
123
124
  "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
124
125
  }
125
- raise Error, "failed to delete object #{@project}:#{object_uri}, response:#{response}"
126
+ raise Error, "failed to delete object #{@destination_project}:#{object_uri}, response:#{response}"
126
127
  end
127
128
  end
128
129
  end
@@ -50,7 +50,9 @@ module Embulk
50
50
  begin
51
51
  yield
52
52
  rescue ::Java::Java.net.SocketException, ::Java::Java.net.ConnectException => e
53
- if ['Broken pipe', 'Connection reset', 'Connection timed out'].include?(e.message)
53
+ if ['Broken pipe', 'Connection reset', 'Connection timed out'].select { |x| e.message.include?(x) }.empty?
54
+ raise e
55
+ else
54
56
  if retries < @task['retries']
55
57
  retries += 1
56
58
  Embulk.logger.warn { "embulk-output-bigquery: retry \##{retries}, #{e.class} #{e.message}" }
@@ -59,8 +61,6 @@ module Embulk
59
61
  Embulk.logger.error { "embulk-output-bigquery: retry exhausted \##{retries}, #{e.class} #{e.message}" }
60
62
  raise e
61
63
  end
62
- else
63
- raise e
64
64
  end
65
65
  end
66
66
  end
@@ -210,6 +210,20 @@ module Embulk
210
210
  TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
211
211
  end
212
212
  }
213
+ when 'DATETIME'
214
+ if @timestamp_format
215
+ Proc.new {|val|
216
+ next nil if val.nil?
217
+ with_typecast_error(val) do |val|
218
+ Time.strptime(val, @timestamp_format).strftime("%Y-%m-%d %H:%M:%S.%6N")
219
+ end
220
+ }
221
+ else
222
+ Proc.new {|val|
223
+ next nil if val.nil?
224
+ val # Users must care of BQ timestamp format
225
+ }
226
+ end
213
227
  when 'RECORD'
214
228
  Proc.new {|val|
215
229
  next nil if val.nil?
@@ -252,6 +266,11 @@ module Embulk
252
266
  next nil if val.nil?
253
267
  val.localtime(zone_offset).strftime("%Y-%m-%d")
254
268
  }
269
+ when 'DATETIME'
270
+ Proc.new {|val|
271
+ next nil if val.nil?
272
+ val.localtime(zone_offset).strftime("%Y-%m-%d %H:%M:%S.%6N")
273
+ }
255
274
  else
256
275
  raise NotSupportedType, "cannot take column type #{type} for timestamp column"
257
276
  end
@@ -36,6 +36,7 @@ module Embulk
36
36
  'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
37
37
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
38
38
  'project' => config.param('project', :string, :default => nil),
39
+ 'destination_project' => config.param('destination_project', :string, :default => nil),
39
40
  'dataset' => config.param('dataset', :string),
40
41
  'location' => config.param('location', :string, :default => nil),
41
42
  'table' => config.param('table', :string),
@@ -89,6 +90,8 @@ module Embulk
89
90
  'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
90
91
  'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
91
92
 
93
+ 'temporary_table_expiration' => config.param('temporary_table_expiration', :integer, :default => nil),
94
+
92
95
  # for debug
93
96
  'skip_load' => config.param('skip_load', :bool, :default => false),
94
97
  'temp_table' => config.param('temp_table', :string, :default => nil),
@@ -135,12 +138,13 @@ module Embulk
135
138
  json_key = JSON.parse(task['json_keyfile'])
136
139
  task['project'] ||= json_key['project_id']
137
140
  rescue => e
138
- raise ConfigError.new "json_keyfile is not a JSON file"
141
+ raise ConfigError.new "Parsing 'json_keyfile' failed with error: #{e.class} #{e.message}"
139
142
  end
140
143
  end
141
144
  if task['project'].nil?
142
145
  raise ConfigError.new "Required field \"project\" is not set"
143
146
  end
147
+ task['destination_project'] ||= task['project']
144
148
 
145
149
  if (task['payload_column'] or task['payload_column_index']) and task['auto_create_table']
146
150
  if task['schema_file'].nil? and task['template_table'].nil?
@@ -166,7 +170,7 @@ module Embulk
166
170
  begin
167
171
  JSON.parse(File.read(task['schema_file']))
168
172
  rescue => e
169
- raise ConfigError.new "schema_file #{task['schema_file']} is not a JSON file"
173
+ raise ConfigError.new "Parsing 'schema_file' #{task['schema_file']} failed with error: #{e.class} #{e.message}"
170
174
  end
171
175
  end
172
176
 
@@ -298,19 +302,23 @@ module Embulk
298
302
  end
299
303
  end
300
304
 
305
+ temp_table_expiration = task['temporary_table_expiration']
306
+ temp_options = {'expiration_time' => temp_table_expiration}
307
+
301
308
  case task['mode']
302
309
  when 'delete_in_advance'
303
310
  bigquery.delete_table_or_partition(task['table'])
304
311
  bigquery.create_table_if_not_exists(task['table'])
305
312
  when 'replace'
306
- bigquery.create_table_if_not_exists(task['temp_table'])
313
+ bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
307
314
  bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
308
315
  when 'append'
309
- bigquery.create_table_if_not_exists(task['temp_table'])
316
+ bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
310
317
  bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
311
318
  when 'replace_backup'
312
- bigquery.create_table_if_not_exists(task['temp_table'])
319
+ bigquery.create_table_if_not_exists(task['temp_table'], options: temp_options)
313
320
  bigquery.create_table_if_not_exists(task['table'])
321
+
314
322
  bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
315
323
  else # append_direct
316
324
  if task['auto_create_table']
@@ -29,6 +29,7 @@ else
29
29
  def least_task
30
30
  {
31
31
  'project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
32
+ 'destination_project' => JSON.parse(File.read(JSON_KEYFILE))['project_id'],
32
33
  'dataset' => 'your_dataset_name',
33
34
  'table' => 'your_table_name',
34
35
  'auth_method' => 'json_key',
@@ -45,6 +45,7 @@ module Embulk
45
45
  assert_equal "application_default", task['auth_method']
46
46
  assert_equal nil, task['json_keyfile']
47
47
  assert_equal "your_project_name", task['project']
48
+ assert_equal "your_project_name", task['destination_project']
48
49
  assert_equal "your_dataset_name", task['dataset']
49
50
  assert_equal nil, task['location']
50
51
  assert_equal "your_table_name", task['table']
@@ -284,6 +285,16 @@ module Embulk
284
285
  config = least_config.merge('schema_update_options' => ['FOO'])
285
286
  assert_raise { Bigquery.configure(config, schema, processor_count) }
286
287
  end
288
+
289
+ def test_destination_project
290
+ config = least_config.merge('destination_project' => 'your_destination_project_name')
291
+ task = Bigquery.configure(config, schema, processor_count)
292
+
293
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
294
+ assert_equal 'your_destination_project_name', task['destination_project']
295
+ assert_equal 'your_project_name', task['project']
296
+ end
297
+
287
298
  end
288
299
  end
289
300
  end
data/test/test_helper.rb CHANGED
@@ -63,7 +63,8 @@ module Embulk
63
63
  Column.new({index: 3, name: 'string', type: :string}),
64
64
  Column.new({index: 4, name: 'timestamp', type: :timestamp}),
65
65
  Column.new({index: 5, name: 'date', type: :timestamp}),
66
- Column.new({index: 6, name: 'json', type: :json}),
66
+ Column.new({index: 6, name: 'datetime', type: :timestamp}),
67
+ Column.new({index: 7, name: 'json', type: :json}),
67
68
  ])
68
69
  task = {
69
70
  'column_options' => [
@@ -73,6 +74,7 @@ module Embulk
73
74
  {'name' => 'string', 'type' => 'INTEGER'},
74
75
  {'name' => 'timestamp', 'type' => 'INTEGER'},
75
76
  {'name' => 'date', 'type' => 'DATE'},
77
+ {'name' => 'datetime', 'type' => 'DATETIME'},
76
78
  {'name' => 'json', 'type' => 'RECORD', 'fields' => [
77
79
  { 'name' => 'key1', 'type' => 'STRING' },
78
80
  ]},
@@ -85,6 +87,7 @@ module Embulk
85
87
  {name: 'string', type: 'INTEGER'},
86
88
  {name: 'timestamp', type: 'INTEGER'},
87
89
  {name: 'date', type: 'DATE'},
90
+ {name: 'datetime', type: 'DATETIME'},
88
91
  {name: 'json', type: 'RECORD', fields: [
89
92
  {name: 'key1', type: 'STRING'},
90
93
  ]},
@@ -94,6 +94,10 @@ module Embulk
94
94
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
95
95
  end
96
96
 
97
+ def test_datetime
98
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
99
+ end
100
+
97
101
  def test_record
98
102
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
99
103
  end
@@ -138,6 +142,10 @@ module Embulk
138
142
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
139
143
  end
140
144
 
145
+ def test_datetime
146
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
147
+ end
148
+
141
149
  def test_record
142
150
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
143
151
  end
@@ -178,6 +186,10 @@ module Embulk
178
186
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
179
187
  end
180
188
 
189
+ def test_datetime
190
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
191
+ end
192
+
181
193
  def test_record
182
194
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
183
195
  end
@@ -236,6 +248,20 @@ module Embulk
236
248
  assert_raise { converter.call('foo') }
237
249
  end
238
250
 
251
+ def test_datetime
252
+ converter = ValueConverterFactory.new(
253
+ SCHEMA_TYPE, 'DATETIME',
254
+ timestamp_format: '%Y/%m/%d'
255
+ ).create_converter
256
+ assert_equal nil, converter.call(nil)
257
+ assert_equal "2016-02-26 00:00:00.000000", converter.call("2016/02/26")
258
+
259
+ # Users must care of BQ datetime format by themselves with no timestamp_format
260
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
261
+ assert_equal nil, converter.call(nil)
262
+ assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
263
+ end
264
+
239
265
  def test_record
240
266
  converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
241
267
  assert_equal({'foo'=>'foo'}, converter.call(%Q[{"foo":"foo"}]))
@@ -294,7 +320,7 @@ module Embulk
294
320
  timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
295
321
  expected = "2016-02-26"
296
322
  assert_equal expected, converter.call(timestamp)
297
-
323
+
298
324
  converter = ValueConverterFactory.new(
299
325
  SCHEMA_TYPE, 'DATE', timezone: 'Asia/Tokyo'
300
326
  ).create_converter
@@ -306,6 +332,24 @@ module Embulk
306
332
  assert_raise { converter.call('foo') }
307
333
  end
308
334
 
335
+ def test_datetime
336
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
337
+ assert_equal nil, converter.call(nil)
338
+ timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
339
+ expected = "2016-02-26 00:00:00.500000"
340
+ assert_equal expected, converter.call(timestamp)
341
+
342
+ converter = ValueConverterFactory.new(
343
+ SCHEMA_TYPE, 'DATETIME', timezone: 'Asia/Tokyo'
344
+ ).create_converter
345
+ assert_equal nil, converter.call(nil)
346
+ timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
347
+ expected = "2016-02-26 00:00:00.500000"
348
+ assert_equal expected, converter.call(timestamp)
349
+
350
+ assert_raise { converter.call('foo') }
351
+ end
352
+
309
353
  def test_record
310
354
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
311
355
  end
metadata CHANGED
@@ -1,17 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.6.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  - Naotoshi Seo
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-10-28 00:00:00.000000000 Z
12
+ date: 2021-09-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
+ name: signet
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
18
  - - "~>"
@@ -20,9 +21,8 @@ dependencies:
20
21
  - - "<"
21
22
  - !ruby/object:Gem::Version
22
23
  version: 0.12.0
23
- name: signet
24
- prerelease: false
25
24
  type: :runtime
25
+ prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
27
27
  requirements:
28
28
  - - "~>"
@@ -32,56 +32,90 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.12.0
34
34
  - !ruby/object:Gem::Dependency
35
+ name: google-api-client
35
36
  requirement: !ruby/object:Gem::Requirement
36
37
  requirements:
37
38
  - - "<"
38
39
  - !ruby/object:Gem::Version
39
40
  version: 0.33.0
40
- name: google-api-client
41
- prerelease: false
42
41
  type: :runtime
42
+ prerelease: false
43
43
  version_requirements: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "<"
46
46
  - !ruby/object:Gem::Version
47
47
  version: 0.33.0
48
48
  - !ruby/object:Gem::Dependency
49
+ name: time_with_zone
49
50
  requirement: !ruby/object:Gem::Requirement
50
51
  requirements:
51
52
  - - ">="
52
53
  - !ruby/object:Gem::Version
53
54
  version: '0'
54
- name: time_with_zone
55
- prerelease: false
56
55
  type: :runtime
56
+ prerelease: false
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  - !ruby/object:Gem::Dependency
63
+ name: representable
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 3.0.0
69
+ - - "<"
70
+ - !ruby/object:Gem::Version
71
+ version: '3.1'
72
+ type: :runtime
73
+ prerelease: false
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - "~>"
77
+ - !ruby/object:Gem::Version
78
+ version: 3.0.0
79
+ - - "<"
80
+ - !ruby/object:Gem::Version
81
+ version: '3.1'
82
+ - !ruby/object:Gem::Dependency
83
+ name: faraday
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '0.12'
89
+ type: :runtime
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '0.12'
96
+ - !ruby/object:Gem::Dependency
97
+ name: bundler
63
98
  requirement: !ruby/object:Gem::Requirement
64
99
  requirements:
65
100
  - - ">="
66
101
  - !ruby/object:Gem::Version
67
102
  version: 1.10.6
68
- name: bundler
69
- prerelease: false
70
103
  type: :development
104
+ prerelease: false
71
105
  version_requirements: !ruby/object:Gem::Requirement
72
106
  requirements:
73
107
  - - ">="
74
108
  - !ruby/object:Gem::Version
75
109
  version: 1.10.6
76
110
  - !ruby/object:Gem::Dependency
111
+ name: rake
77
112
  requirement: !ruby/object:Gem::Requirement
78
113
  requirements:
79
114
  - - ">="
80
115
  - !ruby/object:Gem::Version
81
116
  version: '10.0'
82
- name: rake
83
- prerelease: false
84
117
  type: :development
118
+ prerelease: false
85
119
  version_requirements: !ruby/object:Gem::Requirement
86
120
  requirements:
87
121
  - - ">="
@@ -123,7 +157,7 @@ homepage: https://github.com/embulk/embulk-output-bigquery
123
157
  licenses:
124
158
  - MIT
125
159
  metadata: {}
126
- post_install_message:
160
+ post_install_message:
127
161
  rdoc_options: []
128
162
  require_paths:
129
163
  - lib
@@ -138,9 +172,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
138
172
  - !ruby/object:Gem::Version
139
173
  version: '0'
140
174
  requirements: []
141
- rubyforge_project:
142
- rubygems_version: 2.6.14.1
143
- signing_key:
175
+ rubygems_version: 3.0.3
176
+ signing_key:
144
177
  specification_version: 4
145
178
  summary: Google BigQuery output plugin for Embulk
146
179
  test_files: