embulk-output-bigquery 0.4.11 → 0.4.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +4 -2
- data/embulk-output-bigquery.gemspec +1 -1
- data/lib/embulk/output/bigquery.rb +7 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +8 -1
- data/test/test_configure.rb +9 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
|
4
|
+
data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
|
7
|
+
data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.4.13 - 2019-03-20
|
2
|
+
|
3
|
+
* [enhancement] Support clustered table as an experimental feature
|
4
|
+
|
5
|
+
## 0.4.12 - 2019-03-20
|
6
|
+
|
7
|
+
* [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
|
8
|
+
|
1
9
|
## 0.4.11 - 2019-03-07
|
2
10
|
|
3
11
|
* [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)
|
data/README.md
CHANGED
@@ -107,7 +107,9 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
107
107
|
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
108
108
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
109
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
|
-
| time_partitioning.
|
110
|
+
| time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
|
111
|
+
| clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
112
|
+
| clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
111
113
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
112
114
|
|
113
115
|
### Example
|
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
|
|
466
468
|
|
467
469
|
### Release gem:
|
468
470
|
|
469
|
-
|
471
|
+
Change the version of gemspec, and write CHANGELOG.md. Then,
|
470
472
|
|
471
473
|
```
|
472
474
|
$ bundle exec rake release
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.13"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -89,6 +89,7 @@ module Embulk
|
|
89
89
|
'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
|
90
90
|
'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
|
91
91
|
'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
|
92
|
+
'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
|
92
93
|
'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
|
93
94
|
|
94
95
|
# for debug
|
@@ -234,6 +235,12 @@ module Embulk
|
|
234
235
|
task['time_partitioning'] = {'type' => 'DAY'}
|
235
236
|
end
|
236
237
|
|
238
|
+
if task['clustering']
|
239
|
+
unless task['clustering']['fields']
|
240
|
+
raise ConfigError.new "`clustering` must have `fields` key"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
237
244
|
if task['schema_update_options']
|
238
245
|
task['schema_update_options'].each do |schema_update_option|
|
239
246
|
unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)
|
@@ -437,7 +437,14 @@ module Embulk
|
|
437
437
|
type: options['time_partitioning']['type'],
|
438
438
|
expiration_ms: options['time_partitioning']['expiration_ms'],
|
439
439
|
field: options['time_partitioning']['field'],
|
440
|
-
|
440
|
+
require_partition_filter: options['time_partitioning']['require_partition_filter'],
|
441
|
+
}
|
442
|
+
end
|
443
|
+
|
444
|
+
options['clustering'] ||= @task['clustering']
|
445
|
+
if options['clustering']
|
446
|
+
body[:clustering] = {
|
447
|
+
fields: options['clustering']['fields'],
|
441
448
|
}
|
442
449
|
end
|
443
450
|
|
data/test/test_configure.rb
CHANGED
@@ -84,6 +84,7 @@ module Embulk
|
|
84
84
|
assert_equal false, task['ignore_unknown_values']
|
85
85
|
assert_equal false, task['allow_quoted_newlines']
|
86
86
|
assert_equal nil, task['time_partitioning']
|
87
|
+
assert_equal nil, task['clustering']
|
87
88
|
assert_equal false, task['skip_load']
|
88
89
|
end
|
89
90
|
|
@@ -277,6 +278,14 @@ module Embulk
|
|
277
278
|
assert_equal 'DAY', task['time_partitioning']['type']
|
278
279
|
end
|
279
280
|
|
281
|
+
def test_clustering
|
282
|
+
config = least_config.merge('clustering' => {'fields' => ['field_a']})
|
283
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
284
|
+
|
285
|
+
config = least_config.merge('clustering' => {})
|
286
|
+
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
287
|
+
end
|
288
|
+
|
280
289
|
def test_schema_update_options
|
281
290
|
config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
|
282
291
|
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-03-
|
12
|
+
date: 2019-03-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
161
|
version: '0'
|
162
162
|
requirements: []
|
163
163
|
rubyforge_project:
|
164
|
-
rubygems_version: 2.6.14
|
164
|
+
rubygems_version: 2.6.14.1
|
165
165
|
signing_key:
|
166
166
|
specification_version: 4
|
167
167
|
summary: Google BigQuery output plugin for Embulk
|