embulk-output-bigquery 0.4.13 → 0.4.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +10 -6
- data/CHANGELOG.md +4 -0
- data/Gemfile +2 -0
- data/README.md +41 -6
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_delete_in_advance_field_partitioned_table.yml +33 -0
- data/example/config_replace_backup_field_partitioned_table.yml +34 -0
- data/example/{config_replace_backup_paritioned_table.yml → config_replace_backup_partitioned_table.yml} +0 -0
- data/example/config_replace_field_partitioned_table.yml +33 -0
- data/example/{config_replace_paritioned_table.yml → config_replace_partitioned_table.yml} +0 -0
- data/lib/embulk/output/bigquery.rb +34 -23
- data/lib/embulk/output/bigquery/bigquery_client.rb +22 -20
- data/lib/embulk/output/bigquery/helper.rb +8 -4
- data/test/helper.rb +2 -1
- data/test/test_bigquery_client.rb +16 -16
- data/test/test_example.rb +5 -3
- data/test/test_transaction.rb +24 -24
- metadata +8 -7
- data/example/example.jsonl +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4fb376f288bfa86d632d727b3d0770ca4b94e364261c3f87a2569c801ee2fa00
|
4
|
+
data.tar.gz: 2571a07afb9aac0774e0744f9d5118712bb83f44f82470dd4fd25bf515c7b9fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15f71decc69d34d8fbc3ee09452a6307107b71f759820b8a0521c6473b2231c4706febf216b59baae0e18fc3a06a056c18552d1093f0ac264ef84183a6d27992
|
7
|
+
data.tar.gz: 7ee57f82766927cb804bf0d88550f7f3e4d0459315160a0eec98ccd4c00e2a2423a093cffd17e836d2dba8461cbc2ae4e227ff85d60c7c9628d32b1fd142b7eb
|
data/.travis.yml
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
language: ruby
|
2
2
|
matrix:
|
3
3
|
include:
|
4
|
-
- env: EMBULK_VERSION=0.8.39
|
5
|
-
rvm: jruby-9.1.5.0 # bundled jruby version
|
6
|
-
jdk: openjdk7 # embulk 0.8.x uses jdk7
|
7
4
|
- env: EMBULK_VERSION=0.9.15
|
8
|
-
rvm: jruby-9.1.
|
5
|
+
rvm: jruby-9.1.15.0 # bundled jruby version
|
9
6
|
jdk: openjdk8 # embulk 0.9.x uses jdk8
|
10
7
|
- env: EMBULK_VERSION=latest
|
11
|
-
rvm: jruby-9.1.
|
8
|
+
rvm: jruby-9.1.15.0 # ?
|
12
9
|
jdk: openjdk8 # ?
|
13
10
|
allow_failures:
|
14
11
|
- env: EMBULK_VERSION=latest
|
15
12
|
before_install:
|
16
13
|
- curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
|
17
|
-
|
14
|
+
- chmod a+x embulk.jar
|
15
|
+
- BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
|
16
|
+
- gem uninstall bundler -x
|
17
|
+
- gem install bundler -v ${BUNDLER_VERSION}
|
18
|
+
install:
|
19
|
+
- ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
|
20
|
+
script:
|
21
|
+
- bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -47,7 +47,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
47
47
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
48
48
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
49
49
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
50
|
-
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
|
50
|
+
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
|
51
51
|
| schema_file | string | optional | | /path/to/schema.json |
|
52
52
|
| template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
|
53
53
|
| prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
|
@@ -108,7 +108,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
108
108
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
109
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
110
|
| time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
|
111
|
-
| clustering | hash | optional | nil |
|
111
|
+
| clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
112
112
|
| clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
113
113
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
114
114
|
|
@@ -158,6 +158,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
|
|
158
158
|
|
159
159
|
```is_skip_job_result_check``` must be false when replace mode
|
160
160
|
|
161
|
+
NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
|
162
|
+
|
161
163
|
##### replace_backup
|
162
164
|
|
163
165
|
1. Load to temporary table (Create and WRITE_APPEND in parallel)
|
@@ -250,9 +252,10 @@ out:
|
|
250
252
|
|
251
253
|
### Dynamic table creating
|
252
254
|
|
253
|
-
|
255
|
+
This plugin tries to create a table using BigQuery API when
|
254
256
|
|
255
|
-
|
257
|
+
* mode is either of `delete_in_advance`, `replace`, `replace_backup`, `append`.
|
258
|
+
* mode is `append_direct` and `auto_create_table` is true.
|
256
259
|
|
257
260
|
There are 3 ways to set schema.
|
258
261
|
|
@@ -370,7 +373,7 @@ out:
|
|
370
373
|
|
371
374
|
### GCS Bucket
|
372
375
|
|
373
|
-
This is useful to reduce number of consumed jobs, which is limited by [
|
376
|
+
This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
|
374
377
|
|
375
378
|
This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
|
376
379
|
|
@@ -448,8 +451,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
|
|
448
451
|
|
449
452
|
Place your embulk with `.jar` extension:
|
450
453
|
|
454
|
+
|
455
|
+
```
|
456
|
+
$ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
|
457
|
+
$ chmod a+x embulk.jar
|
458
|
+
```
|
459
|
+
|
460
|
+
Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
|
461
|
+
|
462
|
+
```
|
463
|
+
$ echo JRUBY_VERSION | ./embulk.jar irb
|
464
|
+
2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
|
465
|
+
Switch to inspect mode.
|
466
|
+
JRUBY_VERSION
|
467
|
+
"X.X.X.X"
|
468
|
+
|
469
|
+
$ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
|
470
|
+
2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
|
471
|
+
Switch to inspect mode.
|
472
|
+
require 'bundler'; Bundler::VERSION
|
473
|
+
"Y.Y.Y"
|
474
|
+
```
|
475
|
+
|
476
|
+
Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
|
477
|
+
|
478
|
+
```
|
479
|
+
$ rbenv install jruby-X.X.X.X
|
480
|
+
$ rbenv local jruby-X.X.X.X
|
481
|
+
$ gem install bundler -v Y.Y.Y
|
482
|
+
```
|
483
|
+
|
484
|
+
Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
|
485
|
+
|
451
486
|
```
|
452
|
-
$
|
487
|
+
$ ./embulk.jar bundle install --path vendor/bundle
|
453
488
|
```
|
454
489
|
|
455
490
|
Run tests with `env RUBYOPT="-r ./embulk.jar`:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.14"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: delete_in_advance
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_field_partitioned_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
field: timestamp
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace_backup
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_field_partitioned_table_name
|
26
|
+
table_old: your_field_partitioned_table_name_old
|
27
|
+
source_format: NEWLINE_DELIMITED_JSON
|
28
|
+
compression: NONE
|
29
|
+
auto_create_dataset: true
|
30
|
+
auto_create_table: true
|
31
|
+
schema_file: example/schema.json
|
32
|
+
time_partitioning:
|
33
|
+
type: 'DAY'
|
34
|
+
field: 'timestamp'
|
File without changes
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_field_partitioned_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
field: 'timestamp'
|
File without changes
|
@@ -64,7 +64,7 @@ module Embulk
|
|
64
64
|
'default_timestamp_format' => config.param('default_timestamp_format', :string, :default => ValueConverterFactory::DEFAULT_TIMESTAMP_FORMAT),
|
65
65
|
'payload_column' => config.param('payload_column', :string, :default => nil),
|
66
66
|
'payload_column_index' => config.param('payload_column_index', :integer, :default => nil),
|
67
|
-
|
67
|
+
|
68
68
|
'open_timeout_sec' => config.param('open_timeout_sec', :integer, :default => nil),
|
69
69
|
'timeout_sec' => config.param('timeout_sec', :integer, :default => nil), # google-api-ruby-client < v0.11.0
|
70
70
|
'send_timeout_sec' => config.param('send_timeout_sec', :integer, :default => nil), # google-api-ruby-client >= v0.11.0
|
@@ -276,7 +276,7 @@ module Embulk
|
|
276
276
|
sum + (response ? response.statistics.load.output_rows.to_i : 0)
|
277
277
|
end
|
278
278
|
if task['temp_table']
|
279
|
-
num_output_rows = bigquery.
|
279
|
+
num_output_rows = bigquery.get_table_or_partition(task['temp_table']).num_rows.to_i
|
280
280
|
else
|
281
281
|
num_output_rows = num_response_rows
|
282
282
|
end
|
@@ -306,37 +306,48 @@ module Embulk
|
|
306
306
|
|
307
307
|
case task['mode']
|
308
308
|
when 'delete_in_advance'
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
309
|
+
bigquery.delete_partition(task['table'])
|
310
|
+
bigquery.create_table_if_not_exists(task['table'])
|
311
|
+
when 'replace'
|
312
|
+
bigquery.create_table_if_not_exists(task['temp_table'])
|
313
|
+
if Helper.has_partition_decorator?(task['table'])
|
314
|
+
if task['auto_create_table']
|
315
|
+
bigquery.create_table_if_not_exists(task['table'])
|
316
|
+
else
|
317
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
318
|
+
end
|
313
319
|
end
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
if task['time_partitioning']
|
320
|
+
when 'append'
|
321
|
+
bigquery.create_table_if_not_exists(task['temp_table'])
|
322
|
+
if Helper.has_partition_decorator?(task['table'])
|
318
323
|
if task['auto_create_table']
|
319
|
-
bigquery.
|
324
|
+
bigquery.create_table_if_not_exists(task['table'])
|
320
325
|
else
|
321
326
|
bigquery.get_table(task['table']) # raises NotFoundError
|
322
327
|
end
|
323
328
|
end
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
+
when 'replace_backup'
|
330
|
+
bigquery.create_table_if_not_exists(task['temp_table'])
|
331
|
+
if Helper.has_partition_decorator?(task['table'])
|
332
|
+
if task['auto_create_table']
|
333
|
+
bigquery.create_table_if_not_exists(task['table'])
|
334
|
+
else
|
335
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
336
|
+
end
|
329
337
|
end
|
330
|
-
|
331
|
-
|
332
|
-
if task['mode'] == 'replace_backup'
|
333
|
-
if task['time_partitioning'] and Helper.has_partition_decorator?(task['table_old'])
|
338
|
+
if Helper.has_partition_decorator?(task['table_old'])
|
334
339
|
if task['auto_create_table']
|
335
|
-
bigquery.
|
340
|
+
bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
|
336
341
|
else
|
337
342
|
bigquery.get_table(task['table_old'], dataset: task['dataset_old']) # raises NotFoundError
|
338
343
|
end
|
339
344
|
end
|
345
|
+
else # append_direct
|
346
|
+
if task['auto_create_table']
|
347
|
+
bigquery.create_table_if_not_exists(task['table'])
|
348
|
+
else
|
349
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
350
|
+
end
|
340
351
|
end
|
341
352
|
end
|
342
353
|
|
@@ -403,7 +414,7 @@ module Embulk
|
|
403
414
|
|
404
415
|
if task['mode'] == 'replace_backup'
|
405
416
|
begin
|
406
|
-
bigquery.
|
417
|
+
bigquery.get_table_or_partition(task['table'])
|
407
418
|
bigquery.copy(task['table'], task['table_old'], task['dataset_old'])
|
408
419
|
rescue NotFoundError
|
409
420
|
end
|
@@ -515,7 +526,7 @@ module Embulk
|
|
515
526
|
|
516
527
|
self.class.rehearsal_thread = Thread.new do
|
517
528
|
begin
|
518
|
-
bigquery.
|
529
|
+
bigquery.create_table_if_not_exists(task['rehearsal_table'])
|
519
530
|
response = bigquery.load(rehearsal_path, task['rehearsal_table'])
|
520
531
|
num_output_rows = response ? response.statistics.load.output_rows.to_i : 0
|
521
532
|
Embulk.logger.info { "embulk-output-bigquery: Loaded rehearsal #{num_output_rows}" }
|
@@ -116,11 +116,11 @@ module Embulk
|
|
116
116
|
if @location
|
117
117
|
body[:job_reference][:location] = @location
|
118
118
|
end
|
119
|
-
|
119
|
+
|
120
120
|
if @task['schema_update_options']
|
121
121
|
body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
|
122
122
|
end
|
123
|
-
|
123
|
+
|
124
124
|
opts = {}
|
125
125
|
|
126
126
|
Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
|
@@ -412,7 +412,7 @@ module Embulk
|
|
412
412
|
end
|
413
413
|
end
|
414
414
|
|
415
|
-
def
|
415
|
+
def create_table_if_not_exists(table, dataset: nil, options: nil)
|
416
416
|
begin
|
417
417
|
dataset ||= @dataset
|
418
418
|
options ||= {}
|
@@ -466,8 +466,17 @@ module Embulk
|
|
466
466
|
end
|
467
467
|
|
468
468
|
def delete_table(table, dataset: nil)
|
469
|
+
table = Helper.chomp_partition_decorator(table)
|
470
|
+
delete_table_or_partition(table, dataset: dataset)
|
471
|
+
end
|
472
|
+
|
473
|
+
def delete_partition(table, dataset: nil)
|
474
|
+
delete_table_or_partition(table, dataset: dataset)
|
475
|
+
end
|
476
|
+
|
477
|
+
# if `table` with a partition decorator is given, a partition is deleted.
|
478
|
+
def delete_table_or_partition(table, dataset: nil)
|
469
479
|
begin
|
470
|
-
table = Helper.chomp_partition_decorator(table)
|
471
480
|
dataset ||= @dataset
|
472
481
|
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
|
473
482
|
with_network_retry { client.delete_table(@project, dataset, table) }
|
@@ -486,8 +495,16 @@ module Embulk
|
|
486
495
|
end
|
487
496
|
|
488
497
|
def get_table(table, dataset: nil)
|
498
|
+
table = Helper.chomp_partition_decorator(table)
|
499
|
+
get_table_or_partition(table)
|
500
|
+
end
|
501
|
+
|
502
|
+
def get_partition(table, dataset: nil)
|
503
|
+
get_table_or_partition(table)
|
504
|
+
end
|
505
|
+
|
506
|
+
def get_table_or_partition(table, dataset: nil)
|
489
507
|
begin
|
490
|
-
table = Helper.chomp_partition_decorator(table)
|
491
508
|
dataset ||= @dataset
|
492
509
|
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
|
493
510
|
with_network_retry { client.get_table(@project, dataset, table) }
|
@@ -503,21 +520,6 @@ module Embulk
|
|
503
520
|
raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
|
504
521
|
end
|
505
522
|
end
|
506
|
-
|
507
|
-
# Is this only a way to drop partition?
|
508
|
-
def delete_partition(table_with_partition, dataset: nil)
|
509
|
-
dataset ||= @dataset
|
510
|
-
begin
|
511
|
-
table = Helper.chomp_partition_decorator(table_with_partition)
|
512
|
-
get_table(table, dataset: dataset)
|
513
|
-
rescue NotFoundError
|
514
|
-
else
|
515
|
-
Embulk.logger.info { "embulk-output-bigquery: Delete partition... #{@project}:#{dataset}.#{table_with_partition}" }
|
516
|
-
Tempfile.create('embulk_output_bigquery_empty_file_') do |fp|
|
517
|
-
load(fp.path, table_with_partition, write_disposition: 'WRITE_TRUNCATE')
|
518
|
-
end
|
519
|
-
end
|
520
|
-
end
|
521
523
|
end
|
522
524
|
end
|
523
525
|
end
|
@@ -7,12 +7,16 @@ module Embulk
|
|
7
7
|
class Helper
|
8
8
|
PARTITION_DECORATOR_REGEXP = /\$.+\z/
|
9
9
|
|
10
|
-
def self.
|
11
|
-
|
10
|
+
def self.field_partitioning?(task)
|
11
|
+
(task['time_partitioning'] || {}).key?('field')
|
12
12
|
end
|
13
13
|
|
14
|
-
def self.
|
15
|
-
|
14
|
+
def self.has_partition_decorator?(table_name)
|
15
|
+
!!(table_name =~ PARTITION_DECORATOR_REGEXP)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.chomp_partition_decorator(table_name)
|
19
|
+
table_name.sub(PARTITION_DECORATOR_REGEXP, '')
|
16
20
|
end
|
17
21
|
|
18
22
|
def self.bq_type_from_embulk_type(embulk_type)
|
data/test/helper.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'bundler/setup'
|
3
4
|
require 'test/unit'
|
4
5
|
require 'test/unit/rr'
|
5
6
|
|
@@ -7,7 +8,7 @@ require 'embulk'
|
|
7
8
|
begin
|
8
9
|
# Embulk ~> 0.8.x
|
9
10
|
Embulk.setup
|
10
|
-
rescue NotImplementedError
|
11
|
+
rescue NotImplementedError, NoMethodError, NameError
|
11
12
|
# Embulk ~> 0.9.x
|
12
13
|
require 'embulk/java/bootstrap'
|
13
14
|
end
|
@@ -96,20 +96,20 @@ else
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
-
sub_test_case "
|
100
|
-
def
|
99
|
+
sub_test_case "create_table_if_not_exists" do
|
100
|
+
def test_create_table_if_not_exists
|
101
101
|
client.delete_table('your_table_name')
|
102
|
-
assert_nothing_raised { client.
|
102
|
+
assert_nothing_raised { client.create_table_if_not_exists('your_table_name') }
|
103
103
|
end
|
104
104
|
|
105
|
-
def
|
106
|
-
assert_nothing_raised { client.
|
105
|
+
def test_create_table_if_not_exists_already_exists
|
106
|
+
assert_nothing_raised { client.create_table_if_not_exists('your_table_name') }
|
107
107
|
end
|
108
108
|
|
109
109
|
def test_create_partitioned_table
|
110
110
|
client.delete_table('your_table_name')
|
111
111
|
assert_nothing_raised do
|
112
|
-
client.
|
112
|
+
client.create_table_if_not_exists('your_table_name$20160929', options:{
|
113
113
|
'time_partitioning' => {'type'=>'DAY', 'expiration_ms'=>1000}
|
114
114
|
})
|
115
115
|
end
|
@@ -118,7 +118,7 @@ else
|
|
118
118
|
|
119
119
|
sub_test_case "delete_table" do
|
120
120
|
def test_delete_table
|
121
|
-
client.
|
121
|
+
client.create_table_if_not_exists('your_table_name')
|
122
122
|
assert_nothing_raised { client.delete_table('your_table_name') }
|
123
123
|
end
|
124
124
|
|
@@ -127,14 +127,14 @@ else
|
|
127
127
|
end
|
128
128
|
|
129
129
|
def test_delete_partitioned_table
|
130
|
-
client.
|
130
|
+
client.create_table_if_not_exists('your_table_name')
|
131
131
|
assert_nothing_raised { client.delete_table('your_table_name$20160929') }
|
132
132
|
end
|
133
133
|
end
|
134
134
|
|
135
135
|
sub_test_case "get_table" do
|
136
136
|
def test_get_table
|
137
|
-
client.
|
137
|
+
client.create_table_if_not_exists('your_table_name')
|
138
138
|
assert_nothing_raised { client.get_table('your_table_name') }
|
139
139
|
end
|
140
140
|
|
@@ -146,7 +146,7 @@ else
|
|
146
146
|
end
|
147
147
|
|
148
148
|
def test_get_partitioned_table
|
149
|
-
client.
|
149
|
+
client.create_table_if_not_exists('your_table_name')
|
150
150
|
assert_nothing_raised { client.get_table('your_table_name$20160929') }
|
151
151
|
end
|
152
152
|
end
|
@@ -154,7 +154,7 @@ else
|
|
154
154
|
sub_test_case "delete_partition" do
|
155
155
|
def test_delete_partition
|
156
156
|
client.delete_table('your_table_name')
|
157
|
-
client.
|
157
|
+
client.create_table_if_not_exists('your_table_name$20160929')
|
158
158
|
assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
|
159
159
|
ensure
|
160
160
|
client.delete_table('your_table_name')
|
@@ -162,7 +162,7 @@ else
|
|
162
162
|
|
163
163
|
def test_delete_partition_of_non_partitioned_table
|
164
164
|
client.delete_table('your_table_name')
|
165
|
-
client.
|
165
|
+
client.create_table_if_not_exists('your_table_name')
|
166
166
|
assert_raise { client.delete_partition('your_table_name$20160929') }
|
167
167
|
ensure
|
168
168
|
client.delete_table('your_table_name')
|
@@ -175,7 +175,7 @@ else
|
|
175
175
|
|
176
176
|
sub_test_case "fields" do
|
177
177
|
def test_fields_from_table
|
178
|
-
client.
|
178
|
+
client.create_table_if_not_exists('your_table_name')
|
179
179
|
fields = client.fields_from_table('your_table_name')
|
180
180
|
expected = [
|
181
181
|
{:type=>"BOOLEAN", :name=>"boolean"},
|
@@ -190,15 +190,15 @@ else
|
|
190
190
|
end
|
191
191
|
|
192
192
|
sub_test_case "copy" do
|
193
|
-
def
|
194
|
-
client.
|
193
|
+
def test_create_table_if_not_exists
|
194
|
+
client.create_table_if_not_exists('your_table_name')
|
195
195
|
assert_nothing_raised { client.copy('your_table_name', 'your_table_name_old') }
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
199
|
sub_test_case "load" do
|
200
200
|
def test_load
|
201
|
-
client.
|
201
|
+
client.create_table_if_not_exists('your_table_name')
|
202
202
|
File.write("tmp/your_file_name.csv", record.to_csv)
|
203
203
|
assert_nothing_raised { client.load("/tmp/your_file_name.csv", 'your_table_name') }
|
204
204
|
end
|
data/test/test_example.rb
CHANGED
@@ -9,8 +9,10 @@ unless File.exist?(JSON_KEYFILE)
|
|
9
9
|
else
|
10
10
|
class TestExample < Test::Unit::TestCase
|
11
11
|
def embulk_path
|
12
|
-
if File.exist?("#{ENV['
|
13
|
-
"#{ENV['
|
12
|
+
if File.exist?("#{ENV['HOME']}/.embulk/bin/embulk")
|
13
|
+
"#{ENV['HOME']}/.embulk/bin/embulk"
|
14
|
+
elsif File.exist?("#{ENV['PWD']}/embulk.jar")
|
15
|
+
"#{ENV['PWD']}/embulk.jar"
|
14
16
|
elsif File.exist?("/usr/local/bin/embulk")
|
15
17
|
"/usr/local/bin/embulk"
|
16
18
|
else
|
@@ -19,7 +21,7 @@ else
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def embulk_run(config_path)
|
22
|
-
Bundler.with_clean_env do
|
24
|
+
::Bundler.with_clean_env do
|
23
25
|
cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
|
24
26
|
puts "=" * 64
|
25
27
|
puts cmd
|
data/test/test_transaction.rb
CHANGED
@@ -55,7 +55,7 @@ module Embulk
|
|
55
55
|
task = Bigquery.configure(config, schema, processor_count)
|
56
56
|
any_instance_of(BigqueryClient) do |obj|
|
57
57
|
mock(obj).create_dataset(config['dataset'])
|
58
|
-
mock(obj).
|
58
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
59
59
|
end
|
60
60
|
Bigquery.transaction(config, schema, processor_count, &control)
|
61
61
|
end
|
@@ -74,7 +74,7 @@ module Embulk
|
|
74
74
|
task = Bigquery.configure(config, schema, processor_count)
|
75
75
|
any_instance_of(BigqueryClient) do |obj|
|
76
76
|
mock(obj).create_dataset(config['dataset'])
|
77
|
-
mock(obj).
|
77
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
78
78
|
end
|
79
79
|
Bigquery.transaction(config, schema, processor_count, &control)
|
80
80
|
end
|
@@ -86,19 +86,19 @@ module Embulk
|
|
86
86
|
task = Bigquery.configure(config, schema, processor_count)
|
87
87
|
any_instance_of(BigqueryClient) do |obj|
|
88
88
|
mock(obj).get_dataset(config['dataset'])
|
89
|
-
mock(obj).
|
90
|
-
mock(obj).
|
89
|
+
mock(obj).delete_partition(config['table'])
|
90
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
91
91
|
end
|
92
92
|
Bigquery.transaction(config, schema, processor_count, &control)
|
93
93
|
end
|
94
94
|
|
95
95
|
def test_delete_in_advance_with_partitioning
|
96
|
-
config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929')
|
96
|
+
config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929', 'auto_create_table' => true)
|
97
97
|
task = Bigquery.configure(config, schema, processor_count)
|
98
98
|
any_instance_of(BigqueryClient) do |obj|
|
99
99
|
mock(obj).get_dataset(config['dataset'])
|
100
100
|
mock(obj).delete_partition(config['table'])
|
101
|
-
mock(obj).
|
101
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
102
102
|
end
|
103
103
|
Bigquery.transaction(config, schema, processor_count, &control)
|
104
104
|
end
|
@@ -110,7 +110,7 @@ module Embulk
|
|
110
110
|
task = Bigquery.configure(config, schema, processor_count)
|
111
111
|
any_instance_of(BigqueryClient) do |obj|
|
112
112
|
mock(obj).get_dataset(config['dataset'])
|
113
|
-
mock(obj).
|
113
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
114
114
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
115
115
|
mock(obj).delete_table(config['temp_table'])
|
116
116
|
end
|
@@ -122,7 +122,7 @@ module Embulk
|
|
122
122
|
task = Bigquery.configure(config, schema, processor_count)
|
123
123
|
any_instance_of(BigqueryClient) do |obj|
|
124
124
|
mock(obj).get_dataset(config['dataset'])
|
125
|
-
mock(obj).
|
125
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
126
126
|
mock(obj).get_table(config['table'])
|
127
127
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
128
128
|
mock(obj).delete_table(config['temp_table'])
|
@@ -135,8 +135,8 @@ module Embulk
|
|
135
135
|
task = Bigquery.configure(config, schema, processor_count)
|
136
136
|
any_instance_of(BigqueryClient) do |obj|
|
137
137
|
mock(obj).get_dataset(config['dataset'])
|
138
|
-
mock(obj).
|
139
|
-
mock(obj).
|
138
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
139
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
140
140
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
141
141
|
mock(obj).delete_table(config['temp_table'])
|
142
142
|
end
|
@@ -151,9 +151,9 @@ module Embulk
|
|
151
151
|
any_instance_of(BigqueryClient) do |obj|
|
152
152
|
mock(obj).get_dataset(config['dataset'])
|
153
153
|
mock(obj).get_dataset(config['dataset_old'])
|
154
|
-
mock(obj).
|
154
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
155
155
|
|
156
|
-
mock(obj).
|
156
|
+
mock(obj).get_table_or_partition(task['table'])
|
157
157
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
158
158
|
|
159
159
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -168,9 +168,9 @@ module Embulk
|
|
168
168
|
any_instance_of(BigqueryClient) do |obj|
|
169
169
|
mock(obj).create_dataset(config['dataset'])
|
170
170
|
mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
|
171
|
-
mock(obj).
|
171
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
172
172
|
|
173
|
-
mock(obj).
|
173
|
+
mock(obj).get_table_or_partition(task['table'])
|
174
174
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
175
175
|
|
176
176
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -185,11 +185,11 @@ module Embulk
|
|
185
185
|
any_instance_of(BigqueryClient) do |obj|
|
186
186
|
mock(obj).get_dataset(config['dataset'])
|
187
187
|
mock(obj).get_dataset(config['dataset_old'])
|
188
|
-
mock(obj).
|
188
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
189
189
|
mock(obj).get_table(task['table'])
|
190
190
|
mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
|
191
191
|
|
192
|
-
mock(obj).
|
192
|
+
mock(obj).get_table_or_partition(task['table'])
|
193
193
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
194
194
|
|
195
195
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -204,11 +204,11 @@ module Embulk
|
|
204
204
|
any_instance_of(BigqueryClient) do |obj|
|
205
205
|
mock(obj).get_dataset(config['dataset'])
|
206
206
|
mock(obj).get_dataset(config['dataset_old'])
|
207
|
-
mock(obj).
|
208
|
-
mock(obj).
|
209
|
-
mock(obj).
|
207
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
208
|
+
mock(obj).create_table_if_not_exists(task['table'])
|
209
|
+
mock(obj).create_table_if_not_exists(task['table_old'], dataset: config['dataset_old'])
|
210
210
|
|
211
|
-
mock(obj).
|
211
|
+
mock(obj).get_table_or_partition(task['table'])
|
212
212
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
213
213
|
|
214
214
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -224,7 +224,7 @@ module Embulk
|
|
224
224
|
task = Bigquery.configure(config, schema, processor_count)
|
225
225
|
any_instance_of(BigqueryClient) do |obj|
|
226
226
|
mock(obj).get_dataset(config['dataset'])
|
227
|
-
mock(obj).
|
227
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
228
228
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
229
229
|
mock(obj).delete_table(config['temp_table'])
|
230
230
|
end
|
@@ -236,7 +236,7 @@ module Embulk
|
|
236
236
|
task = Bigquery.configure(config, schema, processor_count)
|
237
237
|
any_instance_of(BigqueryClient) do |obj|
|
238
238
|
mock(obj).get_dataset(config['dataset'])
|
239
|
-
mock(obj).
|
239
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
240
240
|
mock(obj).get_table(config['table'])
|
241
241
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
242
242
|
mock(obj).delete_table(config['temp_table'])
|
@@ -249,8 +249,8 @@ module Embulk
|
|
249
249
|
task = Bigquery.configure(config, schema, processor_count)
|
250
250
|
any_instance_of(BigqueryClient) do |obj|
|
251
251
|
mock(obj).get_dataset(config['dataset'])
|
252
|
-
mock(obj).
|
253
|
-
mock(obj).
|
252
|
+
mock(obj).create_table_if_not_exists(config['temp_table'])
|
253
|
+
mock(obj).create_table_if_not_exists(config['table'])
|
254
254
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
255
255
|
mock(obj).delete_table(config['temp_table'])
|
256
256
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-08-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,6 +87,7 @@ files:
|
|
87
87
|
- example/config_client_options.yml
|
88
88
|
- example/config_csv.yml
|
89
89
|
- example/config_delete_in_advance.yml
|
90
|
+
- example/config_delete_in_advance_field_partitioned_table.yml
|
90
91
|
- example/config_delete_in_advance_partitioned_table.yml
|
91
92
|
- example/config_expose_errors.yml
|
92
93
|
- example/config_gcs.yml
|
@@ -105,8 +106,10 @@ files:
|
|
105
106
|
- example/config_progress_log_interval.yml
|
106
107
|
- example/config_replace.yml
|
107
108
|
- example/config_replace_backup.yml
|
108
|
-
- example/
|
109
|
-
- example/
|
109
|
+
- example/config_replace_backup_field_partitioned_table.yml
|
110
|
+
- example/config_replace_backup_partitioned_table.yml
|
111
|
+
- example/config_replace_field_partitioned_table.yml
|
112
|
+
- example/config_replace_partitioned_table.yml
|
110
113
|
- example/config_replace_schema_update_options.yml
|
111
114
|
- example/config_skip_file_generation.yml
|
112
115
|
- example/config_table_strftime.yml
|
@@ -114,7 +117,6 @@ files:
|
|
114
117
|
- example/config_uncompressed.yml
|
115
118
|
- example/config_with_rehearsal.yml
|
116
119
|
- example/example.csv
|
117
|
-
- example/example.jsonl
|
118
120
|
- example/example.yml
|
119
121
|
- example/example2_1.csv
|
120
122
|
- example/example2_2.csv
|
@@ -160,8 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
160
162
|
- !ruby/object:Gem::Version
|
161
163
|
version: '0'
|
162
164
|
requirements: []
|
163
|
-
|
164
|
-
rubygems_version: 2.6.14.1
|
165
|
+
rubygems_version: 3.0.3
|
165
166
|
signing_key:
|
166
167
|
specification_version: 4
|
167
168
|
summary: Google BigQuery output plugin for Embulk
|
data/example/example.jsonl
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":90,"string":"l6lTsvxd","double":903.4,"boolean":true}
|
2
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":91,"string":"XoALSEQg","double":394.5,"boolean":true}
|
3
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":92,"string":"0hgDRI_m","double":810.9,"boolean":true}
|
4
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":93,"string":"KjCRAc-A","double":477.4,"boolean":true}
|
5
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":94,"string":"fyQVGlT8","double":725.3,"boolean":true}
|
6
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":95,"string":"FpBYRPWK","double":316.6,"boolean":true}
|
7
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":96,"string":"9ikvnUqp","double":369.5,"boolean":true}
|
8
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":97,"string":"RRNYDAzK","double":506.5,"boolean":true}
|
9
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":90,"string":"l6lTsvxd","double":903.4,"boolean":false}
|
10
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":91,"string":"XoALSEQg","double":394.5,"boolean":false}
|
11
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":92,"string":"0hgDRI_m","double":810.9,"boolean":false}
|
12
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":93,"string":"KjCRAc-A","double":477.4,"boolean":false}
|
13
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":94,"string":"fyQVGlT8","double":725.3,"boolean":false}
|
14
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":95,"string":"FpBYRPWK","double":316.6,"boolean":false}
|
15
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":96,"string":"9ikvnUqp","double":369.5,"boolean":false}
|
16
|
-
{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":97,"string":"RRNYDAzK","double":506.5,"boolean":false}
|