embulk-output-bigquery 0.4.12 → 0.4.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 9fceabdc34780426ea3ceda76c1afe00a4c99115207bb8feaf0151ce9ae48911
4
- data.tar.gz: 708649547b8a6693a12722e376c5c923a6e3f3a17ef180ebed56ecc92d6270b3
2
+ SHA1:
3
+ metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
4
+ data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
5
5
  SHA512:
6
- metadata.gz: 8d31eb9867c7c70b9eb1b01bfb2889afae9a3693b328391d62ddc74cc552452832d50edb2af8d69285b9468cc99292c72de132f86bf48d2cb1920ab6f2be5fcf
7
- data.tar.gz: 4bcab8f4bf48962985d9904c64530dd45b71dcd2afbc42388f3640636f0c1a971f0fcebda0db8bde2599ab54355210822bcf5de8d2196d42eceb9bca3d6145ae
6
+ metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
7
+ data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
@@ -1,3 +1,7 @@
1
+ ## 0.4.13 - 2019-03-20
2
+
3
+ * [enhancement] Support clustered table as an experimental feature
4
+
1
5
  ## 0.4.12 - 2019-03-20
2
6
 
3
7
  * [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
data/README.md CHANGED
@@ -108,6 +108,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
108
108
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
109
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
110
  | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
111
+ | clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
112
+ | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
111
113
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
112
114
 
113
115
  ### Example
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
466
468
 
467
469
  ### Release gem:
468
470
 
469
- Fix gemspec, then
471
+ Change the version of gemspec, and write CHANGELOG.md. Then,
470
472
 
471
473
  ```
472
474
  $ bundle exec rake release
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.12"
3
+ spec.version = "0.4.13"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -89,6 +89,7 @@ module Embulk
89
89
  'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
90
90
  'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
91
91
  'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
92
+ 'clustering' => config.param('clustering', :hash, :default => nil), # google-api-ruby-client >= v0.21.0
92
93
  'schema_update_options' => config.param('schema_update_options', :array, :default => nil),
93
94
 
94
95
  # for debug
@@ -234,6 +235,12 @@ module Embulk
234
235
  task['time_partitioning'] = {'type' => 'DAY'}
235
236
  end
236
237
 
238
+ if task['clustering']
239
+ unless task['clustering']['fields']
240
+ raise ConfigError.new "`clustering` must have `fields` key"
241
+ end
242
+ end
243
+
237
244
  if task['schema_update_options']
238
245
  task['schema_update_options'].each do |schema_update_option|
239
246
  unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)
@@ -441,6 +441,13 @@ module Embulk
441
441
  }
442
442
  end
443
443
 
444
+ options['clustering'] ||= @task['clustering']
445
+ if options['clustering']
446
+ body[:clustering] = {
447
+ fields: options['clustering']['fields'],
448
+ }
449
+ end
450
+
444
451
  opts = {}
445
452
  Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{@location_for_log}, #{body}, #{opts})" }
446
453
  with_network_retry { client.insert_table(@project, dataset, body, opts) }
@@ -84,6 +84,7 @@ module Embulk
84
84
  assert_equal false, task['ignore_unknown_values']
85
85
  assert_equal false, task['allow_quoted_newlines']
86
86
  assert_equal nil, task['time_partitioning']
87
+ assert_equal nil, task['clustering']
87
88
  assert_equal false, task['skip_load']
88
89
  end
89
90
 
@@ -277,6 +278,14 @@ module Embulk
277
278
  assert_equal 'DAY', task['time_partitioning']['type']
278
279
  end
279
280
 
281
+ def test_clustering
282
+ config = least_config.merge('clustering' => {'fields' => ['field_a']})
283
+ assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
284
+
285
+ config = least_config.merge('clustering' => {})
286
+ assert_raise { Bigquery.configure(config, schema, processor_count) }
287
+ end
288
+
280
289
  def test_schema_update_options
281
290
  config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
282
291
  assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.12
4
+ version: 0.4.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -160,7 +160,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
160
160
  - !ruby/object:Gem::Version
161
161
  version: '0'
162
162
  requirements: []
163
- rubygems_version: 3.0.3
163
+ rubyforge_project:
164
+ rubygems_version: 2.6.14.1
164
165
  signing_key:
165
166
  specification_version: 4
166
167
  summary: Google BigQuery output plugin for Embulk