embulk-output-bigquery 0.4.13 → 0.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +10 -6
- data/CHANGELOG.md +4 -0
- data/Gemfile +2 -0
- data/README.md +41 -6
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_delete_in_advance_field_partitioned_table.yml +33 -0
- data/example/config_replace_backup_field_partitioned_table.yml +34 -0
- data/example/{config_replace_backup_paritioned_table.yml → config_replace_backup_partitioned_table.yml} +0 -0
- data/example/config_replace_field_partitioned_table.yml +33 -0
- data/example/{config_replace_paritioned_table.yml → config_replace_partitioned_table.yml} +0 -0
- data/lib/embulk/output/bigquery.rb +34 -23
- data/lib/embulk/output/bigquery/bigquery_client.rb +22 -20
- data/lib/embulk/output/bigquery/helper.rb +8 -4
- data/test/helper.rb +2 -1
- data/test/test_bigquery_client.rb +16 -16
- data/test/test_example.rb +5 -3
- data/test/test_transaction.rb +24 -24
- metadata +8 -7
- data/example/example.jsonl +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4fb376f288bfa86d632d727b3d0770ca4b94e364261c3f87a2569c801ee2fa00
|
4
|
+
data.tar.gz: 2571a07afb9aac0774e0744f9d5118712bb83f44f82470dd4fd25bf515c7b9fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15f71decc69d34d8fbc3ee09452a6307107b71f759820b8a0521c6473b2231c4706febf216b59baae0e18fc3a06a056c18552d1093f0ac264ef84183a6d27992
|
7
|
+
data.tar.gz: 7ee57f82766927cb804bf0d88550f7f3e4d0459315160a0eec98ccd4c00e2a2423a093cffd17e836d2dba8461cbc2ae4e227ff85d60c7c9628d32b1fd142b7eb
|
data/.travis.yml
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
language: ruby
|
2
2
|
matrix:
|
3
3
|
include:
|
4
|
-
- env: EMBULK_VERSION=0.8.39
|
5
|
-
rvm: jruby-9.1.5.0 # bundled jruby version
|
6
|
-
jdk: openjdk7 # embulk 0.8.x uses jdk7
|
7
4
|
- env: EMBULK_VERSION=0.9.15
|
8
|
-
rvm: jruby-9.1.
|
5
|
+
rvm: jruby-9.1.15.0 # bundled jruby version
|
9
6
|
jdk: openjdk8 # embulk 0.9.x uses jdk8
|
10
7
|
- env: EMBULK_VERSION=latest
|
11
|
-
rvm: jruby-9.1.
|
8
|
+
rvm: jruby-9.1.15.0 # ?
|
12
9
|
jdk: openjdk8 # ?
|
13
10
|
allow_failures:
|
14
11
|
- env: EMBULK_VERSION=latest
|
15
12
|
before_install:
|
16
13
|
- curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
|
17
|
-
|
14
|
+
- chmod a+x embulk.jar
|
15
|
+
- BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
|
16
|
+
- gem uninstall bundler -x
|
17
|
+
- gem install bundler -v ${BUNDLER_VERSION}
|
18
|
+
install:
|
19
|
+
- ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
|
20
|
+
script:
|
21
|
+
- bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -47,7 +47,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
47
47
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
48
48
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
49
49
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
50
|
-
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
|
50
|
+
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
|
51
51
|
| schema_file | string | optional | | /path/to/schema.json |
|
52
52
|
| template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
|
53
53
|
| prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
|
@@ -108,7 +108,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
108
108
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
109
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
110
|
| time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
|
111
|
-
| clustering | hash | optional | nil |
|
111
|
+
| clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
112
112
|
| clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
113
113
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
114
114
|
|
@@ -158,6 +158,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
|
|
158
158
|
|
159
159
|
```is_skip_job_result_check``` must be false when replace mode
|
160
160
|
|
161
|
+
NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
|
162
|
+
|
161
163
|
##### replace_backup
|
162
164
|
|
163
165
|
1. Load to temporary table (Create and WRITE_APPEND in parallel)
|
@@ -250,9 +252,10 @@ out:
|
|
250
252
|
|
251
253
|
### Dynamic table creating
|
252
254
|
|
253
|
-
|
255
|
+
This plugin tries to create a table using BigQuery API when
|
254
256
|
|
255
|
-
|
257
|
+
* mode is either of `delete_in_advance`, `replace`, `replace_backup`, `append`.
|
258
|
+
* mode is `append_direct` and `auto_create_table` is true.
|
256
259
|
|
257
260
|
There are 3 ways to set schema.
|
258
261
|
|
@@ -370,7 +373,7 @@ out:
|
|
370
373
|
|
371
374
|
### GCS Bucket
|
372
375
|
|
373
|
-
This is useful to reduce number of consumed jobs, which is limited by [
|
376
|
+
This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
|
374
377
|
|
375
378
|
This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
|
376
379
|
|
@@ -448,8 +451,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
|
|
448
451
|
|
449
452
|
Place your embulk with `.jar` extension:
|
450
453
|
|
454
|
+
|
455
|
+
```
|
456
|
+
$ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
|
457
|
+
$ chmod a+x embulk.jar
|
458
|
+
```
|
459
|
+
|
460
|
+
Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
|
461
|
+
|
462
|
+
```
|
463
|
+
$ echo JRUBY_VERSION | ./embulk.jar irb
|
464
|
+
2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
|
465
|
+
Switch to inspect mode.
|
466
|
+
JRUBY_VERSION
|
467
|
+
"X.X.X.X"
|
468
|
+
|
469
|
+
$ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
|
470
|
+
2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
|
471
|
+
Switch to inspect mode.
|
472
|
+
require 'bundler'; Bundler::VERSION
|
473
|
+
"Y.Y.Y"
|
474
|
+
```
|
475
|
+
|
476
|
+
Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
|
477
|
+
|
478
|
+
```
|
479
|
+
$ rbenv install jruby-X.X.X.X
|
480
|
+
$ rbenv local jruby-X.X.X.X
|
481
|
+
$ gem install bundler -v Y.Y.Y
|
482
|
+
```
|
483
|
+
|
484
|
+
Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
|
485
|
+
|
451
486
|
```
|
452
|
-
$
|
487
|
+
$ ./embulk.jar bundle install --path vendor/bundle
|
453
488
|
```
|
454
489
|
|
455
490
|
Run tests with `env RUBYOPT="-r ./embulk.jar`:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.14"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: delete_in_advance
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_field_partitioned_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
field: timestamp
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace_backup
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_field_partitioned_table_name
|
26
|
+
table_old: your_field_partitioned_table_name_old
|
27
|
+
source_format: NEWLINE_DELIMITED_JSON
|
28
|
+
compression: NONE
|
29
|
+
auto_create_dataset: true
|
30
|
+
auto_create_table: true
|
31
|
+
schema_file: example/schema.json
|
32
|
+
time_partitioning:
|
33
|
+
type: 'DAY'
|
34
|
+
field: 'timestamp'
|
File without changes
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_field_partitioned_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
field: 'timestamp'
|
File without changes
|
@@ -64,7 +64,7 @@ module Embulk
|
|
64
64
|
'default_timestamp_format' => config.param('default_timestamp_format', :string, :default => ValueConverterFactory::DEFAULT_TIMESTAMP_FORMAT),
|
65
65
|
'payload_column' => config.param('payload_column', :string, :default => nil),
|
66
66
|
'payload_column_index' => config.param('payload_column_index', :integer, :default => nil),
|
67
|
-
|
67
|
+
|
68
68
|
'open_timeout_sec' => config.param('open_timeout_sec', :integer, :default => nil),
|
69
69
|
'timeout_sec' => config.param('timeout_sec', :integer, :default => nil), # google-api-ruby-client < v0.11.0
|
70
70
|
'send_timeout_sec' => config.param('send_timeout_sec', :integer, :default => nil), # google-api-ruby-client >= v0.11.0
|
@@ -276,7 +276,7 @@ module Embulk
|
|
276
276
|
sum + (response ? response.statistics.load.output_rows.to_i : 0)
|
277
277
|
end
|
278
278
|
if task['temp_table']
|
279
|
-
num_output_rows = bigquery.
|
279
|
+
num_output_rows = bigquery.get_table_or_partition(task['temp_table']).num_rows.to_i
|
280
280
|
else
|
281
281
|
num_output_rows = num_response_rows
|
282
282
|
end
|
@@ -306,37 +306,48 @@ module Embulk
|
|
306
306
|
|
307
307
|
case task['mode']
|
308
308
|
when 'delete_in_advance'
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
309
|
+
bigquery.delete_partition(task['table'])
|
310
|
+
bigquery.create_table_if_not_exists(task['table'])
|
311
|
+
when 'replace'
|
312
|
+
bigquery.create_table_if_not_exists(task['temp_table'])
|
313
|
+
if Helper.has_partition_decorator?(task['table'])
|
314
|
+
if task['auto_create_table']
|
315
|
+
bigquery.create_table_if_not_exists(task['table'])
|
316
|
+
else
|
317
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
318
|
+
end
|
313
319
|
end
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
if task['time_partitioning']
|
320
|
+
when 'append'
|
321
|
+
bigquery.create_table_if_not_exists(task['temp_table'])
|
322
|
+
if Helper.has_partition_decorator?(task['table'])
|
318
323
|
if task['auto_create_table']
|
319
|
-
bigquery.
|
324
|
+
bigquery.create_table_if_not_exists(task['table'])
|
320
325
|
else
|
321
326
|
bigquery.get_table(task['table']) # raises NotFoundError
|
322
327
|
end
|
323
328
|
end
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
+
when 'replace_backup'
|
330
|
+
bigquery.create_table_if_not_exists(task['temp_table'])
|
331
|
+
if Helper.has_partition_decorator?(task['table'])
|
332
|
+
if task['auto_create_table']
|
333
|
+
bigquery.create_table_if_not_exists(task['table'])
|
334
|
+
else
|
335
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
336
|
+
end
|
329
337
|
end
|
330
|
-
|
331
|
-
|
332
|
-
if task['mode'] == 'replace_backup'
|
333
|
-
if task['time_partitioning'] and Helper.has_partition_decorator?(task['table_old'])
|
338
|
+
if Helper.has_partition_decorator?(task['table_old'])
|
334
339
|
if task['auto_create_table']
|
335
|
-
bigquery.
|
340
|
+
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
|
336
341
|
else
|
337
342
|
bigquery.get_table(task['table_old'], dataset: task['dataset_old']) # raises NotFoundError
|
338
343
|
end
|
339
344
|
end
|
345
|
+
else # append_direct
|
346
|
+
if task['auto_create_table']
|
347
|
+
bigquery.create_table_if_not_exists(task['table'])
|
348
|
+
else
|
349
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
350
|
+
end
|
340
351
|
end
|
341
352
|
end
|
342
353
|
|
@@ -403,7 +414,7 @@ module Embulk
|
|
403
414
|
|
404
415
|
if task['mode'] == 'replace_backup'
|
405
416
|
begin
|
406
|
-
bigquery.
|
417
|
+
bigquery.get_table_or_partition(task['table'])
|
407
418
|
bigquery.copy(task['table'], task['table_old'], task['dataset_old'])
|
408
419
|
rescue NotFoundError
|
409
420
|
end
|
@@ -515,7 +526,7 @@ module Embulk
|
|
515
526
|
|
516
527
|
self.class.rehearsal_thread = Thread.new do
|
517
528
|
begin
|
518
|
-
bigquery.
|
529
|
+
bigquery.create_table_if_not_exists(task['rehearsal_table'])
|
519
530
|
response = bigquery.load(rehearsal_path, task['rehearsal_table'])
|
520
531
|
num_output_rows = response ? response.statistics.load.output_rows.to_i : 0
|
521
532
|
Embulk.logger.info { "embulk-output-bigquery: Loaded rehearsal #{num_output_rows}" }
|
@@ -116,11 +116,11 @@ module Embulk
|
|
116
116
|
if @location
|
117
117
|
body[:job_reference][:location] = @location
|
118
118
|
end
|
119
|
-
|
119
|
+
|
120
120
|
if @task['schema_update_options']
|
121
121
|
body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
|
122
122
|
end
|
123
|
-
|
123
|
+
|
124
124
|
opts = {}
|
125
125
|
|
126
126
|
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
|
@@ -412,7 +412,7 @@ module Embulk
|
|
412
412
|
end
|
413
413
|
end
|
414
414
|
|
415
|
-
def
|
415
|
+
def create_table_if_not_exists(table, dataset: nil, options: nil)
|
416
416
|
begin
|
417
417
|
dataset ||= @dataset
|
418
418
|
options ||= {}
|
@@ -466,8 +466,17 @@ module Embulk
|
|
466
466
|
end
|
467
467
|
|
468
468
|
def delete_table(table, dataset: nil)
|
469
|
+
table = Helper.chomp_partition_decorator(table)
|
470
|
+
delete_table_or_partition(table, dataset: dataset)
|
471
|
+
end
|
472
|
+
|
473
|
+
def delete_partition(table, dataset: nil)
|
474
|
+
delete_table_or_partition(table, dataset: dataset)
|
475
|
+
end
|
476
|
+
|
477
|
+
# if `table` with a partition decorator is given, a partition is deleted.
|
478
|
+
def delete_table_or_partition(table, dataset: nil)
|
469
479
|
begin
|
470
|
-
table = Helper.chomp_partition_decorator(table)
|
471
480
|
dataset ||= @dataset
|
472
481
|
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
|
473
482
|
with_network_retry { client.delete_table(@project, dataset, table) }
|
@@ -486,8 +495,16 @@ module Embulk
|
|
486
495
|
end
|
487
496
|
|
488
497
|
def get_table(table, dataset: nil)
|
498
|
+
table = Helper.chomp_partition_decorator(table)
|
499
|
+
get_table_or_partition(table)
|
500
|
+
end
|
501
|
+
|
502
|
+
def get_partition(table, dataset: nil)
|
503
|
+
get_table_or_partition(table)
|
504
|
+
end
|
505
|
+
|
506
|
+
def get_table_or_partition(table, dataset: nil)
|
489
507
|
begin
|
490
|
-
table = Helper.chomp_partition_decorator(table)
|
491
508
|
dataset ||= @dataset
|
492
509
|
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
|
493
510
|
with_network_retry { client.get_table(@project, dataset, table) }
|
@@ -503,21 +520,6 @@ module Embulk
|
|
503
520
|
raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
|
504
521
|
end
|
505
522
|
end
|
506
|
-
|
507
|
-
# Is this only a way to drop partition?
|
508
|
-
def delete_partition(table_with_partition, dataset: nil)
|
509
|
-
dataset ||= @dataset
|
510
|
-
begin
|
511
|
-
table = Helper.chomp_partition_decorator(table_with_partition)
|
512
|
-
get_table(table, dataset: dataset)
|
513
|
-
rescue NotFoundError
|
514
|
-
else
|
515
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete partition... #{@project}:#{dataset}.#{table_with_partition}" }
|
516
|
-
Tempfile.create('embulk_output_bigquery_empty_file_') do |fp|
|
517
|
-
load(fp.path, table_with_partition, write_disposition: 'WRITE_TRUNCATE')
|
518
|
-
end
|
519
|
-
end
|
520
|
-
end
|
521
523
|
end
|
522
524
|
end
|
523
525
|
end
|
@@ -7,12 +7,16 @@ module Embulk
|
|
7
7
|
class Helper
|
8
8
|
PARTITION_DECORATOR_REGEXP = /\$.+\z/
|
9
9
|
|
10
|
-
def self.
|
11
|
-
|
10
|
+
def self.field_partitioning?(task)
|
11
|
+
(task['time_partitioning'] || {}).key?('field')
|
12
12
|
end
|
13
13
|
|
14
|
-
def self.
|
15
|
-
|
14
|
+
def self.has_partition_decorator?(table_name)
|
15
|
+
!!(table_name =~ PARTITION_DECORATOR_REGEXP)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.chomp_partition_decorator(table_name)
|
19
|
+
table_name.sub(PARTITION_DECORATOR_REGEXP, '')
|
16
20
|
end
|
17
21
|
|
18
22
|
def self.bq_type_from_embulk_type(embulk_type)
|
data/test/helper.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'bundler/setup'
|
3
4
|
require 'test/unit'
|
4
5
|
require 'test/unit/rr'
|
5
6
|
|
@@ -7,7 +8,7 @@ require 'embulk'
|
|
7
8
|
begin
|
8
9
|
# Embulk ~> 0.8.x
|
9
10
|
Embulk.setup
|
10
|
-
rescue NotImplementedError
|
11
|
+
rescue NotImplementedError, NoMethodError, NameError
|
11
12
|
# Embulk ~> 0.9.x
|
12
13
|
require 'embulk/java/bootstrap'
|
13
14
|
end
|
@@ -96,20 +96,20 @@ else
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
-
sub_test_case "
|
100
|
-
def
|
99
|
+
sub_test_case "create_table_if_not_exists" do
|
100
|
+
def test_create_table_if_not_exists
|
101
101
|
client.delete_table('your_table_name')
|
102
|
-
assert_nothing_raised { client.
|
102
|
+
assert_nothing_raised { client.create_table_if_not_exists('your_table_name') }
|
103
103
|
end
|
104
104
|
|
105
|
-
def
|
106
|
-
assert_nothing_raised { client.
|
105
|
+
def test_create_table_if_not_exists_already_exists
|
106
|
+
assert_nothing_raised { client.create_table_if_not_exists('your_table_name') }
|
107
107
|
end
|
108
108
|
|
109
109
|
def test_create_partitioned_table
|
110
110
|
client.delete_table('your_table_name')
|
111
111
|
assert_nothing_raised do
|
112
|
-
client.
|
112
|
+
client.create_table_if_not_exists('your_table_name$20160929', options:{
|
113
113
|
'time_partitioning' => {'type'=>'DAY', 'expiration_ms'=>1000}
|
114
114
|
})
|
115
115
|
end
|
@@ -118,7 +118,7 @@ else
|
|
118
118
|
|
119
119
|
sub_test_case "delete_table" do
|
120
120
|
def test_delete_table
|
121
|
-
client.
|
121
|
+
client.create_table_if_not_exists('your_table_name')
|
122
122
|
assert_nothing_raised { client.delete_table('your_table_name') }
|
123
123
|
end
|
124
124
|
|
@@ -127,14 +127,14 @@ else
|
|
127
127
|
end
|
128
128
|
|
129
129
|
def test_delete_partitioned_table
|
130
|
-
client.
|
130
|
+
client.create_table_if_not_exists('your_table_name')
|
131
131
|
assert_nothing_raised { client.delete_table('your_table_name$20160929') }
|
132
132
|
end
|
133
133
|
end
|
134
134
|
|
135
135
|
sub_test_case "get_table" do
|
136
136
|
def test_get_table
|
137
|
-
client.
|
137
|
+
client.create_table_if_not_exists('your_table_name')
|
138
138
|
assert_nothing_raised { client.get_table('your_table_name') }
|
139
139
|
end
|
140
140
|
|
@@ -146,7 +146,7 @@ else
|
|
146
146
|
end
|
147
147
|
|
148
148
|
def test_get_partitioned_table
|
149
|
-
client.
|
149
|
+
client.create_table_if_not_exists('your_table_name')
|
150
150
|
assert_nothing_raised { client.get_table('your_table_name$20160929') }
|
151
151
|
end
|
152
152
|
end
|
@@ -154,7 +154,7 @@ else
|
|
154
154
|
sub_test_case "delete_partition" do
|
155
155
|
def test_delete_partition
|
156
156
|
client.delete_table('your_table_name')
|
157
|
-
client.
|
157
|
+
client.create_table_if_not_exists('your_table_name$20160929')
|
158
158
|
assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
|
159
159
|
ensure
|
160
160
|
client.delete_table('your_table_name')
|
@@ -162,7 +162,7 @@ else
|
|
162
162
|
|
163
163
|
def test_delete_partition_of_non_partitioned_table
|
164
164
|
client.delete_table('your_table_name')
|
165
|
-
client.
|
165
|
+
client.create_table_if_not_exists('your_table_name')
|
166
166
|
assert_raise { client.delete_partition('your_table_name$20160929') }
|
167
167
|
ensure
|
168
168
|
client.delete_table('your_table_name')
|
@@ -175,7 +175,7 @@ else
|
|
175
175
|
|
176
176
|
sub_test_case "fields" do
|
177
177
|
def test_fields_from_table
|
178
|
-
client.
|
178
|
+
client.create_table_if_not_exists('your_table_name')
|
179
179
|
fields = client.fields_from_table('your_table_name')
|
180
180
|
expected = [
|
181
181
|
{:type=>"BOOLEAN", :name=>"boolean"},
|
@@ -190,15 +190,15 @@ else
|
|
190
190
|
end
|
191
191
|
|
192
192
|
sub_test_case "copy" do
|
193
|
-
def
|
194
|
-
client.
|
193
|
+
def test_create_table_if_not_exists
|
194
|
+
client.create_table_if_not_exists('your_table_name')
|
195
195
|
assert_nothing_raised { client.copy('your_table_name', 'your_table_name_old') }
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
199
|
sub_test_case "load" do
|
200
200
|
def test_load
|
201
|
-
client.
|
201
|
+
client.create_table_if_not_exists('your_table_name')
|
202
202
|
File.write("tmp/your_file_name.csv", record.to_csv)
|
203
203
|
assert_nothing_raised { client.load("/tmp/your_file_name.csv", 'your_table_name') }
|
204
204
|
end
|
data/test/test_example.rb
CHANGED
@@ -9,8 +9,10 @@ unless File.exist?(JSON_KEYFILE)
|
|
9
9
|
else
|
10
10
|
class TestExample < Test::Unit::TestCase
|
11
11
|
def embulk_path
|
12
|
-
if File.exist?("#{ENV['
|
13
|
-
"#{ENV['
|
12
|
+
if File.exist?("#{ENV['HOME']}/.embulk/bin/embulk")
|
13
|
+
"#{ENV['HOME']}/.embulk/bin/embulk"
|
14
|
+
elsif File.exist?("#{ENV['PWD']}/embulk.jar")
|
15
|
+
"#{ENV['PWD']}/embulk.jar"
|
14
16
|
elsif File.exist?("/usr/local/bin/embulk")
|
15
17
|
"/usr/local/bin/embulk"
|
16
18
|
else
|
@@ -19,7 +21,7 @@ else
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def embulk_run(config_path)
|
22
|
-
Bundler.with_clean_env do
|
24
|
+
::Bundler.with_clean_env do
|
23
25
|
cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
|
24
26
|
puts "=" * 64
|
25
27
|
puts cmd
|
data/test/test_transaction.rb
CHANGED
@@ -55,7 +55,7 @@ module Embulk
|
|
55
55
|
task = Bigquery.configure(config, schema, processor_count)
|
56
56
|
any_instance_of(BigqueryClient) do |obj|
|
57
57
|
mock(obj).create_dataset(config['dataset'])
|
58
|
-
mock(obj).
|
58
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
59
59
|
end
|
60
60
|
Bigquery.transaction(config, schema, processor_count, &control)
|
61
61
|
end
|
@@ -74,7 +74,7 @@ module Embulk
|
|
74
74
|
task = Bigquery.configure(config, schema, processor_count)
|
75
75
|
any_instance_of(BigqueryClient) do |obj|
|
76
76
|
mock(obj).create_dataset(config['dataset'])
|
77
|
-
mock(obj).
|
77
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
78
78
|
end
|
79
79
|
Bigquery.transaction(config, schema, processor_count, &control)
|
80
80
|
end
|
@@ -86,19 +86,19 @@ module Embulk
|
|
86
86
|
task = Bigquery.configure(config, schema, processor_count)
|
87
87
|
any_instance_of(BigqueryClient) do |obj|
|
88
88
|
mock(obj).get_dataset(config['dataset'])
|
89
|
-
mock(obj).
|
90
|
-
mock(obj).
|
89
|
+
mock(obj).delete_partition(config['table'])
|
90
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
91
91
|
end
|
92
92
|
Bigquery.transaction(config, schema, processor_count, &control)
|
93
93
|
end
|
94
94
|
|
95
95
|
def test_delete_in_advance_with_partitioning
|
96
|
-
config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929')
|
96
|
+
config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929', 'auto_create_table' => true)
|
97
97
|
task = Bigquery.configure(config, schema, processor_count)
|
98
98
|
any_instance_of(BigqueryClient) do |obj|
|
99
99
|
mock(obj).get_dataset(config['dataset'])
|
100
100
|
mock(obj).delete_partition(config['table'])
|
101
|
-
mock(obj).
|
101
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
102
102
|
end
|
103
103
|
Bigquery.transaction(config, schema, processor_count, &control)
|
104
104
|
end
|
@@ -110,7 +110,7 @@ module Embulk
|
|
110
110
|
task = Bigquery.configure(config, schema, processor_count)
|
111
111
|
any_instance_of(BigqueryClient) do |obj|
|
112
112
|
mock(obj).get_dataset(config['dataset'])
|
113
|
-
mock(obj).
|
113
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
114
114
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
115
115
|
mock(obj).delete_table(config['temp_table'])
|
116
116
|
end
|
@@ -122,7 +122,7 @@ module Embulk
|
|
122
122
|
task = Bigquery.configure(config, schema, processor_count)
|
123
123
|
any_instance_of(BigqueryClient) do |obj|
|
124
124
|
mock(obj).get_dataset(config['dataset'])
|
125
|
-
mock(obj).
|
125
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
126
126
|
mock(obj).get_table(config['table'])
|
127
127
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
128
128
|
mock(obj).delete_table(config['temp_table'])
|
@@ -135,8 +135,8 @@ module Embulk
|
|
135
135
|
task = Bigquery.configure(config, schema, processor_count)
|
136
136
|
any_instance_of(BigqueryClient) do |obj|
|
137
137
|
mock(obj).get_dataset(config['dataset'])
|
138
|
-
mock(obj).
|
139
|
-
mock(obj).
|
138
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
139
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
140
140
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
141
141
|
mock(obj).delete_table(config['temp_table'])
|
142
142
|
end
|
@@ -151,9 +151,9 @@ module Embulk
|
|
151
151
|
any_instance_of(BigqueryClient) do |obj|
|
152
152
|
mock(obj).get_dataset(config['dataset'])
|
153
153
|
mock(obj).get_dataset(config['dataset_old'])
|
154
|
-
mock(obj).
|
154
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
155
155
|
|
156
|
-
mock(obj).
|
156
|
+
mock(obj).get_table_or_partition(task['table'])
|
157
157
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
158
158
|
|
159
159
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -168,9 +168,9 @@ module Embulk
|
|
168
168
|
any_instance_of(BigqueryClient) do |obj|
|
169
169
|
mock(obj).create_dataset(config['dataset'])
|
170
170
|
mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
|
171
|
-
mock(obj).
|
171
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
172
172
|
|
173
|
-
mock(obj).
|
173
|
+
mock(obj).get_table_or_partition(task['table'])
|
174
174
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
175
175
|
|
176
176
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -185,11 +185,11 @@ module Embulk
|
|
185
185
|
any_instance_of(BigqueryClient) do |obj|
|
186
186
|
mock(obj).get_dataset(config['dataset'])
|
187
187
|
mock(obj).get_dataset(config['dataset_old'])
|
188
|
-
mock(obj).
|
188
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
189
189
|
mock(obj).get_table(task['table'])
|
190
190
|
mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
|
191
191
|
|
192
|
-
mock(obj).
|
192
|
+
mock(obj).get_table_or_partition(task['table'])
|
193
193
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
194
194
|
|
195
195
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -204,11 +204,11 @@ module Embulk
|
|
204
204
|
any_instance_of(BigqueryClient) do |obj|
|
205
205
|
mock(obj).get_dataset(config['dataset'])
|
206
206
|
mock(obj).get_dataset(config['dataset_old'])
|
207
|
-
mock(obj).
|
208
|
-
mock(obj).
|
209
|
-
mock(obj).
|
207
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
208
|
+
mock(obj).create_table_if_not_exists(task['table'])
|
209
|
+
mock(obj).create_table_if_not_exists(task['table_old'], dataset: config['dataset_old'])
|
210
210
|
|
211
|
-
mock(obj).
|
211
|
+
mock(obj).get_table_or_partition(task['table'])
|
212
212
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
213
213
|
|
214
214
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -224,7 +224,7 @@ module Embulk
|
|
224
224
|
task = Bigquery.configure(config, schema, processor_count)
|
225
225
|
any_instance_of(BigqueryClient) do |obj|
|
226
226
|
mock(obj).get_dataset(config['dataset'])
|
227
|
-
mock(obj).
|
227
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
228
228
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
229
229
|
mock(obj).delete_table(config['temp_table'])
|
230
230
|
end
|
@@ -236,7 +236,7 @@ module Embulk
|
|
236
236
|
task = Bigquery.configure(config, schema, processor_count)
|
237
237
|
any_instance_of(BigqueryClient) do |obj|
|
238
238
|
mock(obj).get_dataset(config['dataset'])
|
239
|
-
mock(obj).
|
239
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
240
240
|
mock(obj).get_table(config['table'])
|
241
241
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
242
242
|
mock(obj).delete_table(config['temp_table'])
|
@@ -249,8 +249,8 @@ module Embulk
|
|
249
249
|
task = Bigquery.configure(config, schema, processor_count)
|
250
250
|
any_instance_of(BigqueryClient) do |obj|
|
251
251
|
mock(obj).get_dataset(config['dataset'])
|
252
|
-
mock(obj).
|
253
|
-
mock(obj).
|
252
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
253
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
254
254
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
255
255
|
mock(obj).delete_table(config['temp_table'])
|
256
256
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-08-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- example/config_client_options.yml
|
88
88
|
- example/config_csv.yml
|
89
89
|
- example/config_delete_in_advance.yml
|
90
|
+
- example/config_delete_in_advance_field_partitioned_table.yml
|
90
91
|
- example/config_delete_in_advance_partitioned_table.yml
|
91
92
|
- example/config_expose_errors.yml
|
92
93
|
- example/config_gcs.yml
|
@@ -105,8 +106,10 @@ files:
|
|
105
106
|
- example/config_progress_log_interval.yml
|
106
107
|
- example/config_replace.yml
|
107
108
|
- example/config_replace_backup.yml
|
108
|
-
- example/
|
109
|
-
- example/
|
109
|
+
- example/config_replace_backup_field_partitioned_table.yml
|
110
|
+
- example/config_replace_backup_partitioned_table.yml
|
111
|
+
- example/config_replace_field_partitioned_table.yml
|
112
|
+
- example/config_replace_partitioned_table.yml
|
110
113
|
- example/config_replace_schema_update_options.yml
|
111
114
|
- example/config_skip_file_generation.yml
|
112
115
|
- example/config_table_strftime.yml
|
@@ -114,7 +117,6 @@ files:
|
|
114
117
|
- example/config_uncompressed.yml
|
115
118
|
- example/config_with_rehearsal.yml
|
116
119
|
- example/example.csv
|
117
|
-
- example/example.jsonl
|
118
120
|
- example/example.yml
|
119
121
|
- example/example2_1.csv
|
120
122
|
- example/example2_2.csv
|
@@ -160,8 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
160
162
|
- !ruby/object:Gem::Version
|
161
163
|
version: '0'
|
162
164
|
requirements: []
|
163
|
-
|
164
|
-
rubygems_version: 2.6.14.1
|
165
|
+
rubygems_version: 3.0.3
|
165
166
|
signing_key:
|
166
167
|
specification_version: 4
|
167
168
|
summary: Google BigQuery output plugin for Embulk
|
data/example/example.jsonl
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":90,"string":"l6lTsvxd","double":903.4,"boolean":true}
|
2
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":91,"string":"XoALSEQg","double":394.5,"boolean":true}
|
3
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":92,"string":"0hgDRI_m","double":810.9,"boolean":true}
|
4
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":93,"string":"KjCRAc-A","double":477.4,"boolean":true}
|
5
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":94,"string":"fyQVGlT8","double":725.3,"boolean":true}
|
6
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":95,"string":"FpBYRPWK","double":316.6,"boolean":true}
|
7
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":96,"string":"9ikvnUqp","double":369.5,"boolean":true}
|
8
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":97,"string":"RRNYDAzK","double":506.5,"boolean":true}
|
9
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":90,"string":"l6lTsvxd","double":903.4,"boolean":false}
|
10
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":91,"string":"XoALSEQg","double":394.5,"boolean":false}
|
11
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":92,"string":"0hgDRI_m","double":810.9,"boolean":false}
|
12
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":93,"string":"KjCRAc-A","double":477.4,"boolean":false}
|
13
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":94,"string":"fyQVGlT8","double":725.3,"boolean":false}
|
14
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":95,"string":"FpBYRPWK","double":316.6,"boolean":false}
|
15
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":96,"string":"9ikvnUqp","double":369.5,"boolean":false}
|
16
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":97,"string":"RRNYDAzK","double":506.5,"boolean":false}
|