embulk-output-bigquery 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 9fceabdc34780426ea3ceda76c1afe00a4c99115207bb8feaf0151ce9ae48911
4
- data.tar.gz: 708649547b8a6693a12722e376c5c923a6e3f3a17ef180ebed56ecc92d6270b3
2
+ SHA1:
3
+ metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
4
+ data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
5
5
  SHA512:
6
- metadata.gz: 8d31eb9867c7c70b9eb1b01bfb2889afae9a3693b328391d62ddc74cc552452832d50edb2af8d69285b9468cc99292c72de132f86bf48d2cb1920ab6f2be5fcf
7
- data.tar.gz: 4bcab8f4bf48962985d9904c64530dd45b71dcd2afbc42388f3640636f0c1a971f0fcebda0db8bde2599ab54355210822bcf5de8d2196d42eceb9bca3d6145ae
6
+ metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
7
+ data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
@@ -1,3 +1,7 @@
1
+ ## 0.4.13 - 2019-03-20
2
+
3
+ * [enhancement] Support clustered table as an experimental feature
4
+
1
5
  ## 0.4.12 - 2019-03-20
2
6
 
3
7
  * [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
data/README.md CHANGED
@@ -108,6 +108,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
108
108
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
109
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
110
  | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
111
+ | clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
112
+ | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
111
113
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
112
114
 
113
115
  ### Example
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
466
468
 
467
469
  ### Release gem:
468
470
 
469
- Fix gemspec, then
471
+ Change the version of gemspec, and write CHANGELOG.md. Then,
470
472
 
471
473
  ```
472
474
  $ bundle exec rake release
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.12"
3
+ spec.version = "0.4.13"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -89,6 +89,7 @@ module Embulk
89
89
  'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
90
90
  'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
91
91
  'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
92
+ 'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
92
93
  'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
93
94
 
94
95
  # for debug
@@ -234,6 +235,12 @@ module Embulk
234
235
  task['time_partitioning'] = {'type' => 'DAY'}
235
236
  end
236
237
 
238
+ if task['clustering']
239
+ unless task['clustering']['fields']
240
+ raise ConfigError.new "`clustering` must have `fields` key"
241
+ end
242
+ end
243
+
237
244
  if task['schema_update_options']
238
245
  task['schema_update_options'].each do |schema_update_option|
239
246
  unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)
@@ -441,6 +441,13 @@ module Embulk
441
441
  }
442
442
  end
443
443
 
444
+ options['clustering'] ||= @task['clustering']
445
+ if options['clustering']
446
+ body[:clustering] = {
447
+ fields: options['clustering']['fields'],
448
+ }
449
+ end
450
+
444
451
  opts = {}
445
452
  Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
446
453
  with_network_retry { client.insert_table(@project, dataset, body, opts) }
@@ -84,6 +84,7 @@ module Embulk
84
84
  assert_equal false, task['ignore_unknown_values']
85
85
  assert_equal false, task['allow_quoted_newlines']
86
86
  assert_equal nil, task['time_partitioning']
87
+ assert_equal nil, task['clustering']
87
88
  assert_equal false, task['skip_load']
88
89
  end
89
90
 
@@ -277,6 +278,14 @@ module Embulk
277
278
  assert_equal 'DAY', task['time_partitioning']['type']
278
279
  end
279
280
 
281
+ def test_clustering
282
+ config = least_config.merge('clustering' => {'fields' => ['field_a']})
283
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
284
+
285
+ config = least_config.merge('clustering' => {})
286
+ assert_raise { Bigquery.configure(config, schema, processor_count) }
287
+ end
288
+
280
289
  def test_schema_update_options
281
290
  config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
282
291
  assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.12
4
+ version: 0.4.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -160,7 +160,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
160
160
  - !ruby/object:Gem::Version
161
161
  version: '0'
162
162
  requirements: []
163
- rubygems_version: 3.0.3
163
+ rubyforge_project:
164
+ rubygems_version: 2.6.14.1
164
165
  signing_key:
165
166
  specification_version: 4
166
167
  summary: Google BigQuery output plugin for Embulk