RubyGems - embulk-output-bigquery - Versions diffs - 0.4.11 → 0.4.13 - Mend

embulk-output-bigquery 0.4.11 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/README.md +4 -2
data/embulk-output-bigquery.gemspec +1 -1
data/lib/embulk/output/bigquery.rb +7 -0
data/lib/embulk/output/bigquery/bigquery_client.rb +8 -1
data/test/test_configure.rb +9 -0
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 17069f10356213861511a440c26f35b1e2bedc2c
-  data.tar.gz: 931cc6a396ca4497c04edf9248d40d4a4a5d5669
+  metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
+  data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
 SHA512:
-  metadata.gz: 216f4ceefdd3e3a6fc3ca8e58f93d32facf66af350b861e065695a4d2acb0ffa888a9705f40ba2a3570fadae91af1e6601b6a191c4e7a5a41144bd08528b7a9f
-  data.tar.gz: ba57e3df7be7a4b8cc320dd3a61ab3db77aece068132ec0238b94da3dd417b2df9e4b2e3f754f75036b628ce583c75935b8881a49d297347312314b4afca0073
+  metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
+  data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,11 @@
+## 0.4.13 - 2019-03-20
+* [enhancement] Support clustered table as an experimental feature
+## 0.4.12 - 2019-03-20
+* [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
 ## 0.4.11 - 2019-03-07
 * [maintenance] Fix to use `response.status.error_result` instead of `response.status.errors` to check job failure status (thanks to @nownabe)

data/README.md CHANGED Viewed

@@ -107,7 +107,9 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
 |  time_partitioning.type           | string   | required  | nil     | The only type supported is DAY, which will generate one partition per day based on data loading time. |
 |  time_partitioning.expiration_ms  | int      | optional  | nil     | Number of milliseconds for which to keep the storage for a partition. |
 |  time_partitioning.field          | string   | optional  | nil     | `DATE` or `TIMESTAMP` column used for partitioning |
-|  time_partitioning.requirePartitionFilter | boolean      | optional  | nil     | If true, valid partition filter is required when query |
+|  time_partitioning.require_partition_filter | boolean      | optional  | nil     | If true, valid partition filter is required when query |
+|  clustering                       | hash     | optional  | nil     | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
+|  clustering.fields                | array    | required  | nil     | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
 |  schema_update_options            | array    | optional  | nil     | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
 ### Example
@@ -466,7 +468,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
 ### Release gem:
-Fix gemspec, then
+Change the version of gemspec, and write CHANGELOG.md. Then,
 ```
 $ bundle exec rake release

data/embulk-output-bigquery.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |spec|
   spec.name          = "embulk-output-bigquery"
-  spec.version       = "0.4.11"
+  spec.version       = "0.4.13"
   spec.authors       = ["Satoshi Akama", "Naotoshi Seo"]
   spec.summary       = "Google BigQuery output plugin for Embulk"
   spec.description   = "Embulk plugin that insert records to Google BigQuery."

data/lib/embulk/output/bigquery.rb CHANGED Viewed

@@ -89,6 +89,7 @@ module Embulk
           'ignore_unknown_values'          => config.param('ignore_unknown_values',          :bool,    :default => false),
           'allow_quoted_newlines'          => config.param('allow_quoted_newlines',          :bool,    :default => false),
           'time_partitioning'              => config.param('time_partitioning',              :hash,    :default => nil),
+          'clustering'                     => config.param('clustering',                     :hash,    :default => nil), # google-api-ruby-client >= v0.21.0
           'schema_update_options'          => config.param('schema_update_options',          :array,   :default => nil),
           # for debug
@@ -234,6 +235,12 @@ module Embulk
           task['time_partitioning'] = {'type' => 'DAY'}
         end
+        if task['clustering']
+          unless task['clustering']['fields']
+            raise ConfigError.new "`clustering` must have `fields` key"
+          end
+        end
         if task['schema_update_options']
           task['schema_update_options'].each do |schema_update_option|
             unless %w[ALLOW_FIELD_ADDITION ALLOW_FIELD_RELAXATION].include?(schema_update_option)

data/lib/embulk/output/bigquery/bigquery_client.rb CHANGED Viewed

@@ -437,7 +437,14 @@ module Embulk
                 type: options['time_partitioning']['type'],
                 expiration_ms: options['time_partitioning']['expiration_ms'],
                 field: options['time_partitioning']['field'],
-                requirePartitionFilter: options['time_partitioning']['requirePartitionFilter'],
+                require_partition_filter: options['time_partitioning']['require_partition_filter'],
+              }
+            end
+            options['clustering'] ||= @task['clustering']
+            if options['clustering']
+              body[:clustering] = {
+                fields: options['clustering']['fields'],
               }
             end

data/test/test_configure.rb CHANGED Viewed

@@ -84,6 +84,7 @@ module Embulk
         assert_equal false, task['ignore_unknown_values']
         assert_equal false, task['allow_quoted_newlines']
         assert_equal nil, task['time_partitioning']
+        assert_equal nil, task['clustering']
         assert_equal false, task['skip_load']
       end
@@ -277,6 +278,14 @@ module Embulk
         assert_equal 'DAY', task['time_partitioning']['type']
       end
+      def test_clustering
+        config = least_config.merge('clustering' => {'fields' => ['field_a']})
+        assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
+        config = least_config.merge('clustering' => {})
+        assert_raise { Bigquery.configure(config, schema, processor_count) }
+      end
       def test_schema_update_options
         config = least_config.merge('schema_update_options' => ['ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION'])
         assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: embulk-output-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.4.11
+  version: 0.4.13
 platform: ruby
 authors:
 - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-03-06 00:00:00.000000000 Z
+date: 2019-03-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.6.14
+rubygems_version: 2.6.14.1
 signing_key:
 specification_version: 4
 summary: Google BigQuery output plugin for Embulk