embulk-output-bigquery 0.4.11 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 17069f10356213861511a440c26f35b1e2bedc2c
4
- data.tar.gz: 931cc6a396ca4497c04edf9248d40d4a4a5d5669
3
+ metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
4
+ data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
5
5
  SHA512:
6
- metadata.gz: 216f4ceefdd3e3a6fc3ca8e58f93d32facf66af350b861e065695a4d2acb0ffa888a9705f40ba2a3570fadae91af1e6601b6a191c4e7a5a41144bd08528b7a9f
7
- data.tar.gz: ba57e3df7be7a4b8cc320dd3a61ab3db77aece068132ec0238b94da3dd417b2df9e4b2e3f754f75036b628ce583c75935b8881a49d297347312314b4afca0073
6
+ metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
7
+ data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.4.13 - 2019-03-20
2
+
3
+ * [enhancement] Support clustered table as an experimental feature
4
+
5
+ ## 0.4.12 - 2019-03-20
6
+
7
+ * [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
8
+
1
9
  ## 0.4.11 - 2019-03-07
2
10
 
3
11
  * [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)
data/README.md CHANGED
@@ -107,7 +107,9 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
107
107
  | time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
108
108
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
109
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
- | time_partitioning.requirePartitionFilter | boolean | optional | nil | If true, valid partition filter is required when query |
110
+ | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
111
+ | clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
112
+ | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
111
113
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
112
114
 
113
115
  ### Example
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
466
468
 
467
469
  ### Release gem:
468
470
 
469
- Fix gemspec, then
471
+ Change the version of gemspec, and write CHANGELOG.md. Then,
470
472
 
471
473
  ```
472
474
  $ bundle exec rake release
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.11"
3
+ spec.version = "0.4.13"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -89,6 +89,7 @@ module Embulk
89
89
  'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
90
90
  'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
91
91
  'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
92
+ 'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
92
93
  'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
93
94
 
94
95
  # for debug
@@ -234,6 +235,12 @@ module Embulk
234
235
  task['time_partitioning'] = {'type' => 'DAY'}
235
236
  end
236
237
 
238
+ if task['clustering']
239
+ unless task['clustering']['fields']
240
+ raise ConfigError.new "`clustering` must have `fields` key"
241
+ end
242
+ end
243
+
237
244
  if task['schema_update_options']
238
245
  task['schema_update_options'].each do |schema_update_option|
239
246
  unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)
@@ -437,7 +437,14 @@ module Embulk
437
437
  type: options['time_partitioning']['type'],
438
438
  expiration_ms: options['time_partitioning']['expiration_ms'],
439
439
  field: options['time_partitioning']['field'],
440
- requirePartitionFilter: options['time_partitioning']['requirePartitionFilter'],
440
+ require_partition_filter: options['time_partitioning']['require_partition_filter'],
441
+ }
442
+ end
443
+
444
+ options['clustering'] ||= @task['clustering']
445
+ if options['clustering']
446
+ body[:clustering] = {
447
+ fields: options['clustering']['fields'],
441
448
  }
442
449
  end
443
450
 
@@ -84,6 +84,7 @@ module Embulk
84
84
  assert_equal false, task['ignore_unknown_values']
85
85
  assert_equal false, task['allow_quoted_newlines']
86
86
  assert_equal nil, task['time_partitioning']
87
+ assert_equal nil, task['clustering']
87
88
  assert_equal false, task['skip_load']
88
89
  end
89
90
 
@@ -277,6 +278,14 @@ module Embulk
277
278
  assert_equal 'DAY', task['time_partitioning']['type']
278
279
  end
279
280
 
281
+ def test_clustering
282
+ config = least_config.merge('clustering' => {'fields' => ['field_a']})
283
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
284
+
285
+ config = least_config.merge('clustering' => {})
286
+ assert_raise { Bigquery.configure(config, schema, processor_count) }
287
+ end
288
+
280
289
  def test_schema_update_options
281
290
  config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
282
291
  assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.11
4
+ version: 0.4.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-03-06 00:00:00.000000000 Z
12
+ date: 2019-03-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
161
161
  version: '0'
162
162
  requirements: []
163
163
  rubyforge_project:
164
- rubygems_version: 2.6.14
164
+ rubygems_version: 2.6.14.1
165
165
  signing_key:
166
166
  specification_version: 4
167
167
  summary: Google BigQuery output plugin for Embulk