RubyGems - embulk-output-bigquery - Versions diffs - 0.4.11 → 0.4.13 - Mend

embulk-output-bigquery 0.4.11 → 0.4.13

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/README.md +4 -2
data/embulk-output-bigquery.gemspec +1 -1
data/lib/embulk/output/bigquery.rb +7 -0
data/lib/embulk/output/bigquery/bigquery_client.rb +8 -1
data/test/test_configure.rb +9 -0
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 17069f10356213861511a440c26f35b1e2bedc2c
-  data.tar.gz: 931cc6a396ca4497c04edf9248d40d4a4a5d5669
+  metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
+  data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
 SHA512:
-  metadata.gz: 216f4ceefdd3e3a6fc3ca8e58f93d32facf66af350b861e065695a4d2acb0ffa888a9705f40ba2a3570fadae91af1e6601b6a191c4e7a5a41144bd08528b7a9f
-  data.tar.gz: ba57e3df7be7a4b8cc320dd3a61ab3db77aece068132ec0238b94da3dd417b2df9e4b2e3f754f75036b628ce583c75935b8881a49d297347312314b4afca0073
+  metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
+  data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,11 @@
+## 0.4.13 - 2019-03-20
+* [enhancement] Support clustered table as an experimental feature
+## 0.4.12 - 2019-03-20
+* [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
 ## 0.4.11 - 2019-03-07
 * [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)

data/README.md CHANGED Viewed

@@ -107,7 +107,9 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
 |  time_partitioning.type           | string   | required  | nil     | The only type supported is DAY, which will generate one partition per day based on data loading time. |
 |  time_partitioning.expiration_ms  | int      | optional  | nil     | Number of milliseconds for which to keep the storage for a partition. |
 |  time_partitioning.field          | string   | optional  | nil     | `DATE` or `TIMESTAMP` column used for partitioning |
-|  time_partitioning.requirePartitionFilter | boolean      | optional  | nil     | If true, valid partition filter is required when query |
+|  time_partitioning.require_partition_filter | boolean      | optional  | nil     | If true, valid partition filter is required when query |
+|  clustering                       | hash     | optional  | nil     | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
+|  clustering.fields                | array    | required  | nil     | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
 |  schema_update_options            | array    | optional  | nil     | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
 ### Example
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
 ### Release gem:
-Fix gemspec, then
+Change the version of gemspec, and write CHANGELOG.md. Then,
 ```
 $ bundle exec rake release

data/embulk-output-bigquery.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |spec|
   spec.name          = "embulk-output-bigquery"
-  spec.version       = "0.4.11"
+  spec.version       = "0.4.13"
   spec.authors       = ["Satoshi Akama", "Naotoshi Seo"]
   spec.summary       = "Google BigQuery output plugin for Embulk"
   spec.description   = "Embulk plugin that insert records to Google BigQuery."

data/lib/embulk/output/bigquery.rb CHANGED Viewed

@@ -89,6 +89,7 @@ module Embulk
           'ignore_unknown_values'          => config.param('ignore_unknown_values',          :bool,    :default => false),
           'allow_quoted_newlines'          => config.param('allow_quoted_newlines',          :bool,    :default => false),
           'time_partitioning'              => config.param('time_partitioning',              :hash,    :default => nil),
+          'clustering'                     => config.param('clustering',                     :hash,    :default => nil), # google-api-ruby-client >= v0.21.0
           'schema_update_options'          => config.param('schema_update_options',          :array,   :default => nil),
           # for debug
@@ -234,6 +235,12 @@ module Embulk
           task['time_partitioning'] = {'type' => 'DAY'}
         end
+        if task['clustering']
+          unless task['clustering']['fields']
+            raise ConfigError.new "`clustering` must have `fields` key"
+          end
+        end
         if task['schema_update_options']
           task['schema_update_options'].each do |schema_update_option|
             unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)

data/lib/embulk/output/bigquery/bigquery_client.rb CHANGED Viewed

@@ -437,7 +437,14 @@ module Embulk
                 type: options['time_partitioning']['type'],
                 expiration_ms: options['time_partitioning']['expiration_ms'],
                 field: options['time_partitioning']['field'],
-                requirePartitionFilter: options['time_partitioning']['requirePartitionFilter'],
+                require_partition_filter: options['time_partitioning']['require_partition_filter'],
+              }
+            end
+            options['clustering'] ||= @task['clustering']
+            if options['clustering']
+              body[:clustering] = {
+                fields: options['clustering']['fields'],
               }
             end

data/test/test_configure.rb CHANGED Viewed

@@ -84,6 +84,7 @@ module Embulk
         assert_equal false, task['ignore_unknown_values']
         assert_equal false, task['allow_quoted_newlines']
         assert_equal nil, task['time_partitioning']
+        assert_equal nil, task['clustering']
         assert_equal false, task['skip_load']
       end
@@ -277,6 +278,14 @@ module Embulk
         assert_equal 'DAY', task['time_partitioning']['type']
       end
+      def test_clustering
+        config = least_config.merge('clustering' => {'fields' => ['field_a']})
+        assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
+        config = least_config.merge('clustering' => {})
+        assert_raise { Bigquery.configure(config, schema, processor_count) }
+      end
       def test_schema_update_options
         config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
         assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: embulk-output-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.4.11
+  version: 0.4.13
 platform: ruby
 authors:
 - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-03-06 00:00:00.000000000 Z
+date: 2019-03-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.6.14
+rubygems_version: 2.6.14.1
 signing_key:
 specification_version: 4
 summary: Google BigQuery output plugin for Embulk