embulk-output-bigquery 0.4.10 → 0.4.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +14 -9
- data/CHANGELOG.md +4 -0
- data/README.md +17 -9
- data/embulk-output-bigquery.gemspec +1 -2
- data/lib/embulk/output/bigquery/bigquery_client.rb +9 -2
- data/test/helper.rb +7 -2
- metadata +15 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 17069f10356213861511a440c26f35b1e2bedc2c
|
4
|
+
data.tar.gz: 931cc6a396ca4497c04edf9248d40d4a4a5d5669
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 216f4ceefdd3e3a6fc3ca8e58f93d32facf66af350b861e065695a4d2acb0ffa888a9705f40ba2a3570fadae91af1e6601b6a191c4e7a5a41144bd08528b7a9f
|
7
|
+
data.tar.gz: ba57e3df7be7a4b8cc320dd3a61ab3db77aece068132ec0238b94da3dd417b2df9e4b2e3f754f75036b628ce583c75935b8881a49d297347312314b4afca0073
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
language: ruby
|
2
|
-
cache: bundler
|
3
|
-
rvm:
|
4
|
-
- jruby-9.0.5.0
|
5
|
-
- jruby-head
|
6
|
-
jdk:
|
7
|
-
- openjdk7
|
8
|
-
before_install:
|
9
|
-
- gem install bundler
|
10
2
|
matrix:
|
3
|
+
include:
|
4
|
+
- env: EMBULK_VERSION=0.8.39
|
5
|
+
rvm: jruby-9.1.5.0 # bundled jruby version
|
6
|
+
jdk: openjdk7 # embulk 0.8.x uses jdk7
|
7
|
+
- env: EMBULK_VERSION=0.9.15
|
8
|
+
rvm: jruby-9.1.5.0 # bundled jruby version
|
9
|
+
jdk: openjdk8 # embulk 0.9.x uses jdk8
|
10
|
+
- env: EMBULK_VERSION=latest
|
11
|
+
rvm: jruby-9.1.5.0 # ?
|
12
|
+
jdk: openjdk8 # ?
|
11
13
|
allow_failures:
|
12
|
-
-
|
14
|
+
- env: EMBULK_VERSION=latest
|
15
|
+
before_install:
|
16
|
+
- curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
|
17
|
+
script: bundle exec env RUBYOPT="-r ./embulk.jar" rake test
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.4.11 - 2019-03-07
|
2
|
+
|
3
|
+
* [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)
|
4
|
+
|
1
5
|
## 0.4.10 - 2018-11-08
|
2
6
|
* [enhancement] Support column-based partition (thanks to Chi-Ruei Li)
|
3
7
|
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
|
|
14
14
|
* **Cleanup supported**: no
|
15
15
|
* **Dynamic table creating**: yes
|
16
16
|
|
17
|
-
### NOT IMPLEMENTED
|
17
|
+
### NOT IMPLEMENTED
|
18
18
|
* insert data over streaming inserts
|
19
19
|
* for continuous real-time insertions
|
20
20
|
* Please use other product, like [fluent-plugin-bigquery](https://github.com/kaizenplatform/fluent-plugin-bigquery)
|
@@ -35,7 +35,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
35
35
|
|
36
36
|
#### Original options
|
37
37
|
|
38
|
-
| name | type | required? | default | description |
|
38
|
+
| name | type | required? | default | description |
|
39
39
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
40
40
|
| mode | string | optional | "append" | See [Mode](#mode) |
|
41
41
|
| auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
|
@@ -53,7 +53,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
53
53
|
| prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
|
54
54
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
55
55
|
| job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
|
56
|
-
| is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
|
56
|
+
| is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
|
57
57
|
| with_rehearsal | boolean | optional | false | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
|
58
58
|
| rehearsal_counts | integer | optional | 1000 | Specify number of records to load in a rehearsal |
|
59
59
|
| abort_on_error | boolean | optional | true if max_bad_records is 0, otherwise false | Raise an error if number of input rows and number of output rows does not match |
|
@@ -80,7 +80,7 @@ Client or request options
|
|
80
80
|
|
81
81
|
Options for intermediate local files
|
82
82
|
|
83
|
-
| name | type | required? | default | description |
|
83
|
+
| name | type | required? | default | description |
|
84
84
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
85
85
|
| path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
|
86
86
|
| sequence_format | string | optional | .%d.%d | Sequence format for pid, thread id |
|
@@ -107,7 +107,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
107
107
|
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
108
108
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
109
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
|
-
| time_partitioning.requirePartitionFilter | boolean | optional | nil | If
|
110
|
+
| time_partitioning.requirePartitionFilter | boolean | optional | nil | If true, valid partition filter is required when query |
|
111
111
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
112
112
|
|
113
113
|
### Example
|
@@ -372,7 +372,7 @@ This is useful to reduce number of consumed jobs, which is limited by [50,000 jo
|
|
372
372
|
|
373
373
|
This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
|
374
374
|
|
375
|
-
BigQuery supports loading multiple files from GCS with one job
|
375
|
+
BigQuery supports loading multiple files from GCS with one job, therefore, uploading local files to GCS in parallel and then loading from GCS into BigQuery reduces number of consumed jobs to 1.
|
376
376
|
|
377
377
|
Using `gcs_bucket` option, such strategy is enabled. You may also use `auto_create_gcs_bucket` to create the specified GCS bucket automatically.
|
378
378
|
|
@@ -444,16 +444,24 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
|
|
444
444
|
|
445
445
|
### Run test:
|
446
446
|
|
447
|
+
Place your embulk with `.jar` extension:
|
448
|
+
|
449
|
+
```
|
450
|
+
$ cp -a $(which embulk) embulk.jar
|
451
|
+
```
|
452
|
+
|
453
|
+
Run tests with `env RUBYOPT="-r ./embulk.jar`:
|
454
|
+
|
447
455
|
```
|
448
|
-
$ bundle exec rake test
|
456
|
+
$ bundle exec env RUBYOPT="-r ./embulk.jar" rake test
|
449
457
|
```
|
450
458
|
|
451
459
|
To run tests which actually connects to BigQuery such as test/test\_bigquery\_client.rb,
|
452
460
|
prepare a json\_keyfile at example/your-project-000.json, then
|
453
461
|
|
454
462
|
```
|
455
|
-
$ bundle exec ruby test/test_bigquery_client.rb
|
456
|
-
$ bundle exec ruby test/test_example.rb
|
463
|
+
$ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_bigquery_client.rb
|
464
|
+
$ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
|
457
465
|
```
|
458
466
|
|
459
467
|
### Release gem:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.11"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -15,7 +15,6 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.add_dependency 'google-api-client'
|
16
16
|
spec.add_dependency 'time_with_zone'
|
17
17
|
|
18
|
-
spec.add_development_dependency 'embulk', ['>= 0.8.2']
|
19
18
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
20
19
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
21
20
|
end
|
@@ -330,10 +330,13 @@ module Embulk
|
|
330
330
|
end
|
331
331
|
end
|
332
332
|
|
333
|
-
# cf. http://www.rubydoc.info/github/google/google-api-ruby-client/Google/Apis/BigqueryV2/JobStatus#errors-instance_method
|
334
333
|
# `errors` returns Array<Google::Apis::BigqueryV2::ErrorProto> if any error exists.
|
334
|
+
_errors = _response.status.errors
|
335
|
+
|
336
|
+
# cf. http://www.rubydoc.info/github/google/google-api-ruby-client/Google/Apis/BigqueryV2/JobStatus#errors-instance_method
|
337
|
+
# `error_result` returns Google::Apis::BigqueryV2::ErrorProto if job failed.
|
335
338
|
# Otherwise, this returns nil.
|
336
|
-
if
|
339
|
+
if _response.status.error_result
|
337
340
|
msg = "failed during waiting a #{kind} job, get_job(#{@project}, #{job_id}), errors:#{_errors.map(&:to_h)}"
|
338
341
|
if _errors.any? {|error| error.reason == 'backendError' }
|
339
342
|
raise BackendError, msg
|
@@ -347,6 +350,10 @@ module Embulk
|
|
347
350
|
end
|
348
351
|
end
|
349
352
|
|
353
|
+
if _errors
|
354
|
+
Embulk.logger.warn { "embulk-output-bigquery: #{kind} job errors... job_id:[#{job_id}] errors:#{_errors.map(&:to_h)}" }
|
355
|
+
end
|
356
|
+
|
350
357
|
Embulk.logger.info { "embulk-output-bigquery: #{kind} job response... job_id:[#{job_id}] response.statistics:#{_response.statistics.to_h}" }
|
351
358
|
|
352
359
|
_response
|
data/test/helper.rb
CHANGED
@@ -3,9 +3,14 @@
|
|
3
3
|
require 'test/unit'
|
4
4
|
require 'test/unit/rr'
|
5
5
|
|
6
|
-
# require 'embulk/java/bootstrap'
|
7
6
|
require 'embulk'
|
8
|
-
|
7
|
+
begin
|
8
|
+
# Embulk ~> 0.8.x
|
9
|
+
Embulk.setup
|
10
|
+
rescue NotImplementedError
|
11
|
+
# Embulk ~> 0.9.x
|
12
|
+
require 'embulk/java/bootstrap'
|
13
|
+
end
|
9
14
|
Embulk.logger = Embulk::Logger.new('/dev/null')
|
10
15
|
|
11
16
|
APP_ROOT = File.expand_path('../', __dir__)
|
metadata
CHANGED
@@ -1,81 +1,67 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
- Naotoshi Seo
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-03-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name: google-api-client
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
16
|
requirements:
|
18
17
|
- - ">="
|
19
18
|
- !ruby/object:Gem::Version
|
20
19
|
version: '0'
|
21
|
-
|
20
|
+
name: google-api-client
|
22
21
|
prerelease: false
|
22
|
+
type: :runtime
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name: time_with_zone
|
30
29
|
requirement: !ruby/object:Gem::Requirement
|
31
30
|
requirements:
|
32
31
|
- - ">="
|
33
32
|
- !ruby/object:Gem::Version
|
34
33
|
version: '0'
|
35
|
-
|
34
|
+
name: time_with_zone
|
36
35
|
prerelease: false
|
36
|
+
type: :runtime
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
-
name: embulk
|
44
|
-
requirement: !ruby/object:Gem::Requirement
|
45
|
-
requirements:
|
46
|
-
- - ">="
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: 0.8.2
|
49
|
-
type: :development
|
50
|
-
prerelease: false
|
51
|
-
version_requirements: !ruby/object:Gem::Requirement
|
52
|
-
requirements:
|
53
|
-
- - ">="
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: 0.8.2
|
56
|
-
- !ruby/object:Gem::Dependency
|
57
|
-
name: bundler
|
58
43
|
requirement: !ruby/object:Gem::Requirement
|
59
44
|
requirements:
|
60
45
|
- - ">="
|
61
46
|
- !ruby/object:Gem::Version
|
62
47
|
version: 1.10.6
|
63
|
-
|
48
|
+
name: bundler
|
64
49
|
prerelease: false
|
50
|
+
type: :development
|
65
51
|
version_requirements: !ruby/object:Gem::Requirement
|
66
52
|
requirements:
|
67
53
|
- - ">="
|
68
54
|
- !ruby/object:Gem::Version
|
69
55
|
version: 1.10.6
|
70
56
|
- !ruby/object:Gem::Dependency
|
71
|
-
name: rake
|
72
57
|
requirement: !ruby/object:Gem::Requirement
|
73
58
|
requirements:
|
74
59
|
- - ">="
|
75
60
|
- !ruby/object:Gem::Version
|
76
61
|
version: '10.0'
|
77
|
-
|
62
|
+
name: rake
|
78
63
|
prerelease: false
|
64
|
+
type: :development
|
79
65
|
version_requirements: !ruby/object:Gem::Requirement
|
80
66
|
requirements:
|
81
67
|
- - ">="
|
@@ -159,7 +145,7 @@ homepage: https://github.com/embulk/embulk-output-bigquery
|
|
159
145
|
licenses:
|
160
146
|
- MIT
|
161
147
|
metadata: {}
|
162
|
-
post_install_message:
|
148
|
+
post_install_message:
|
163
149
|
rdoc_options: []
|
164
150
|
require_paths:
|
165
151
|
- lib
|
@@ -174,9 +160,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
160
|
- !ruby/object:Gem::Version
|
175
161
|
version: '0'
|
176
162
|
requirements: []
|
177
|
-
rubyforge_project:
|
178
|
-
rubygems_version: 2.
|
179
|
-
signing_key:
|
163
|
+
rubyforge_project:
|
164
|
+
rubygems_version: 2.6.14
|
165
|
+
signing_key:
|
180
166
|
specification_version: 4
|
181
167
|
summary: Google BigQuery output plugin for Embulk
|
182
168
|
test_files:
|