embulk-output-bigquery 0.4.11 → 0.4.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 17069f10356213861511a440c26f35b1e2bedc2c
4
- data.tar.gz: 931cc6a396ca4497c04edf9248d40d4a4a5d5669
3
+ metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
4
+ data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
5
5
  SHA512:
6
- metadata.gz: 216f4ceefdd3e3a6fc3ca8e58f93d32facf66af350b861e065695a4d2acb0ffa888a9705f40ba2a3570fadae91af1e6601b6a191c4e7a5a41144bd08528b7a9f
7
- data.tar.gz: ba57e3df7be7a4b8cc320dd3a61ab3db77aece068132ec0238b94da3dd417b2df9e4b2e3f754f75036b628ce583c75935b8881a49d297347312314b4afca0073
6
+ metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
7
+ data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.4.13 - 2019-03-20
2
+
3
+ * [enhancement] Support clustered table as an experimental feature
4
+
5
+ ## 0.4.12 - 2019-03-20
6
+
7
+ * [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
8
+
1
9
  ## 0.4.11 - 2019-03-07
2
10
 
3
11
  * [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)
data/README.md CHANGED
@@ -107,7 +107,9 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
107
107
  | time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
108
108
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
109
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
- | time_partitioning.requirePartitionFilter | boolean | optional | nil | If true, valid partition filter is required when query |
110
+ | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
111
+ | clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
112
+ | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
111
113
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
112
114
 
113
115
  ### Example
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
466
468
 
467
469
  ### Release gem:
468
470
 
469
- Fix gemspec, then
471
+ Change the version of gemspec, and write CHANGELOG.md. Then,
470
472
 
471
473
  ```
472
474
  $ bundle exec rake release
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.11"
3
+ spec.version = "0.4.13"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -89,6 +89,7 @@ module Embulk
89
89
  'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
90
90
  'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
91
91
  'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
92
+ 'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
92
93
  'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
93
94
 
94
95
  # for debug
@@ -234,6 +235,12 @@ module Embulk
234
235
  task['time_partitioning'] = {'type' => 'DAY'}
235
236
  end
236
237
 
238
+ if task['clustering']
239
+ unless task['clustering']['fields']
240
+ raise ConfigError.new "`clustering` must have `fields` key"
241
+ end
242
+ end
243
+
237
244
  if task['schema_update_options']
238
245
  task['schema_update_options'].each do |schema_update_option|
239
246
  unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)
@@ -437,7 +437,14 @@ module Embulk
437
437
  type: options['time_partitioning']['type'],
438
438
  expiration_ms: options['time_partitioning']['expiration_ms'],
439
439
  field: options['time_partitioning']['field'],
440
- requirePartitionFilter: options['time_partitioning']['requirePartitionFilter'],
440
+ require_partition_filter: options['time_partitioning']['require_partition_filter'],
441
+ }
442
+ end
443
+
444
+ options['clustering'] ||= @task['clustering']
445
+ if options['clustering']
446
+ body[:clustering] = {
447
+ fields: options['clustering']['fields'],
441
448
  }
442
449
  end
443
450
 
@@ -84,6 +84,7 @@ module Embulk
84
84
  assert_equal false, task['ignore_unknown_values']
85
85
  assert_equal false, task['allow_quoted_newlines']
86
86
  assert_equal nil, task['time_partitioning']
87
+ assert_equal nil, task['clustering']
87
88
  assert_equal false, task['skip_load']
88
89
  end
89
90
 
@@ -277,6 +278,14 @@ module Embulk
277
278
  assert_equal 'DAY', task['time_partitioning']['type']
278
279
  end
279
280
 
281
+ def test_clustering
282
+ config = least_config.merge('clustering' => {'fields' => ['field_a']})
283
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
284
+
285
+ config = least_config.merge('clustering' => {})
286
+ assert_raise { Bigquery.configure(config, schema, processor_count) }
287
+ end
288
+
280
289
  def test_schema_update_options
281
290
  config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
282
291
  assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.11
4
+ version: 0.4.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-03-06 00:00:00.000000000 Z
12
+ date: 2019-03-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
161
161
  version: '0'
162
162
  requirements: []
163
163
  rubyforge_project:
164
- rubygems_version: 2.6.14
164
+ rubygems_version: 2.6.14.1
165
165
  signing_key:
166
166
  specification_version: 4
167
167
  summary: Google BigQuery output plugin for Embulk