embulk-output-bigquery 0.4.11 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +4 -2
- data/embulk-output-bigquery.gemspec +1 -1
- data/lib/embulk/output/bigquery.rb +7 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +8 -1
- data/test/test_configure.rb +9 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
|
4
|
+
data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
|
7
|
+
data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.4.13 - 2019-03-20
|
2
|
+
|
3
|
+
* [enhancement] Support clustered table as an experimental feature
|
4
|
+
|
5
|
+
## 0.4.12 - 2019-03-20
|
6
|
+
|
7
|
+
* [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
|
8
|
+
|
1
9
|
## 0.4.11 - 2019-03-07
|
2
10
|
|
3
11
|
* [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)
|
data/README.md
CHANGED
@@ -107,7 +107,9 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
107
107
|
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
108
108
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
109
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
|
-
| time_partitioning.
|
110
|
+
| time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
|
111
|
+
| clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
112
|
+
| clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
111
113
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
112
114
|
|
113
115
|
### Example
|
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
|
|
466
468
|
|
467
469
|
### Release gem:
|
468
470
|
|
469
|
-
|
471
|
+
Change the version of gemspec, and write CHANGELOG.md. Then,
|
470
472
|
|
471
473
|
```
|
472
474
|
$ bundle exec rake release
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.4.
|
3
|
+
spec.version = "0.4.13"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -89,6 +89,7 @@ module Embulk
|
|
89
89
|
'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
|
90
90
|
'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
|
91
91
|
'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
|
92
|
+
'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
|
92
93
|
'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
|
93
94
|
|
94
95
|
# for debug
|
@@ -234,6 +235,12 @@ module Embulk
|
|
234
235
|
task['time_partitioning'] = {'type' => 'DAY'}
|
235
236
|
end
|
236
237
|
|
238
|
+
if task['clustering']
|
239
|
+
unless task['clustering']['fields']
|
240
|
+
raise ConfigError.new "`clustering` must have `fields` key"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
237
244
|
if task['schema_update_options']
|
238
245
|
task['schema_update_options'].each do |schema_update_option|
|
239
246
|
unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)
|
@@ -437,7 +437,14 @@ module Embulk
|
|
437
437
|
type: options['time_partitioning']['type'],
|
438
438
|
expiration_ms: options['time_partitioning']['expiration_ms'],
|
439
439
|
field: options['time_partitioning']['field'],
|
440
|
-
|
440
|
+
require_partition_filter: options['time_partitioning']['require_partition_filter'],
|
441
|
+
}
|
442
|
+
end
|
443
|
+
|
444
|
+
options['clustering'] ||= @task['clustering']
|
445
|
+
if options['clustering']
|
446
|
+
body[:clustering] = {
|
447
|
+
fields: options['clustering']['fields'],
|
441
448
|
}
|
442
449
|
end
|
443
450
|
|
data/test/test_configure.rb
CHANGED
@@ -84,6 +84,7 @@ module Embulk
|
|
84
84
|
assert_equal false, task['ignore_unknown_values']
|
85
85
|
assert_equal false, task['allow_quoted_newlines']
|
86
86
|
assert_equal nil, task['time_partitioning']
|
87
|
+
assert_equal nil, task['clustering']
|
87
88
|
assert_equal false, task['skip_load']
|
88
89
|
end
|
89
90
|
|
@@ -277,6 +278,14 @@ module Embulk
|
|
277
278
|
assert_equal 'DAY', task['time_partitioning']['type']
|
278
279
|
end
|
279
280
|
|
281
|
+
def test_clustering
|
282
|
+
config = least_config.merge('clustering' => {'fields' => ['field_a']})
|
283
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
284
|
+
|
285
|
+
config = least_config.merge('clustering' => {})
|
286
|
+
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
287
|
+
end
|
288
|
+
|
280
289
|
def test_schema_update_options
|
281
290
|
config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
|
282
291
|
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-03-
|
12
|
+
date: 2019-03-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
161
|
version: '0'
|
162
162
|
requirements: []
|
163
163
|
rubyforge_project:
|
164
|
-
rubygems_version: 2.6.14
|
164
|
+
rubygems_version: 2.6.14.1
|
165
165
|
signing_key:
|
166
166
|
specification_version: 4
|
167
167
|
summary: Google BigQuery output plugin for Embulk
|