RubyGems - embulk-output-bigquery - Versions diffs - 0.4.13 → 0.4.14 - Mend

embulk-output-bigquery 0.4.13 → 0.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +5 -5
data/.travis.yml +10 -6
data/CHANGELOG.md +4 -0
data/Gemfile +2 -0
data/README.md +41 -6
data/embulk-output-bigquery.gemspec +1 -1
data/example/config_delete_in_advance_field_partitioned_table.yml +33 -0
data/example/config_replace_backup_field_partitioned_table.yml +34 -0
data/example/{config_replace_backup_paritioned_table.yml → config_replace_backup_partitioned_table.yml} +0 -0
data/example/config_replace_field_partitioned_table.yml +33 -0
data/example/{config_replace_paritioned_table.yml → config_replace_partitioned_table.yml} +0 -0
data/lib/embulk/output/bigquery.rb +34 -23
data/lib/embulk/output/bigquery/bigquery_client.rb +22 -20
data/lib/embulk/output/bigquery/helper.rb +8 -4
data/test/helper.rb +2 -1
data/test/test_bigquery_client.rb +16 -16
data/test/test_example.rb +5 -3
data/test/test_transaction.rb +24 -24
metadata +8 -7
data/example/example.jsonl +0 -16

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
-  data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
+SHA256:
+  metadata.gz: 4fb376f288bfa86d632d727b3d0770ca4b94e364261c3f87a2569c801ee2fa00
+  data.tar.gz: 2571a07afb9aac0774e0744f9d5118712bb83f44f82470dd4fd25bf515c7b9fa
 SHA512:
-  metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
-  data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
+  metadata.gz: 15f71decc69d34d8fbc3ee09452a6307107b71f759820b8a0521c6473b2231c4706febf216b59baae0e18fc3a06a056c18552d1093f0ac264ef84183a6d27992
+  data.tar.gz: 7ee57f82766927cb804bf0d88550f7f3e4d0459315160a0eec98ccd4c00e2a2423a093cffd17e836d2dba8461cbc2ae4e227ff85d60c7c9628d32b1fd142b7eb

data/.travis.yml CHANGED Viewed

@@ -1,17 +1,21 @@
 language: ruby
 matrix:
   include:
-    - env: EMBULK_VERSION=0.8.39
-      rvm: jruby-9.1.5.0 # bundled jruby version
-      jdk: openjdk7 # embulk 0.8.x uses jdk7
     - env: EMBULK_VERSION=0.9.15
-      rvm: jruby-9.1.5.0 # bundled jruby version
+      rvm: jruby-9.1.15.0 # bundled jruby version
       jdk: openjdk8 # embulk 0.9.x uses jdk8
     - env: EMBULK_VERSION=latest
-      rvm: jruby-9.1.5.0 # ?
+      rvm: jruby-9.1.15.0 # ?
       jdk: openjdk8 # ?
   allow_failures:
     - env: EMBULK_VERSION=latest
 before_install:
   - curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
-script: bundle exec env RUBYOPT="-r ./embulk.jar" rake test
+  - chmod a+x embulk.jar
+  - BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
+  - gem uninstall bundler -x
+  - gem install bundler -v ${BUNDLER_VERSION}
+install:
+  - ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
+script:
+  - bundle exec env RUBYOPT="-r ./embulk.jar  -r embulk -r embulk/java/bootstrap" rake test

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.4.14 - 2019-08-10
+* [enhancement] Support field partitioning correctly.
 ## 0.4.13 - 2019-03-20
 * [enhancement] Support clustered table as an experimental feature

data/Gemfile CHANGED Viewed

@@ -1,6 +1,8 @@
 source 'https://rubygems.org/'
 gemspec
+gem 'embulk'
+gem 'liquid', '= 4.0.0' # the version included in embulk.jar
 gem 'embulk-parser-none'
 gem 'embulk-parser-jsonl'
 gem 'pry-nav'

data/README.md CHANGED Viewed

@@ -47,7 +47,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
 |  location                            | string      | optional   | nil                      | geographic location of dataset. See [Location](#location) |
 |  table                               | string      | required   |                          | table name, or table name with a partition decorator such as `table_name$20160929`|
 |  auto_create_dataset                 | boolean     | optional   | false                    | automatically create dataset |
-|  auto_create_table                   | boolean     | optional   | false                    | See [Dynamic Table Creating](#dynamic-table-creating) |
+|  auto_create_table                   | boolean     | optional   | false                    | See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
 |  schema_file                         | string      | optional   |                          | /path/to/schema.json |
 |  template_table                      | string      | optional   |                          | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
 |  prevent_duplicate_insert            | boolean     | optional   | false                    | See [Prevent Duplication](#prevent-duplication) |
@@ -108,7 +108,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
 |  time_partitioning.expiration_ms  | int      | optional  | nil     | Number of milliseconds for which to keep the storage for a partition. |
 |  time_partitioning.field          | string   | optional  | nil     | `DATE` or `TIMESTAMP` column used for partitioning |
 |  time_partitioning.require_partition_filter | boolean      | optional  | nil     | If true, valid partition filter is required when query |
-|  clustering                       | hash     | optional  | nil     | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
+|  clustering                       | hash     | optional  | nil     | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
 |  clustering.fields                | array    | required  | nil     | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
 |  schema_update_options            | array    | optional  | nil     | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
@@ -158,6 +158,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
 ```is_skip_job_result_check``` must be false when replace mode
+NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
 ##### replace_backup
 1. Load to temporary table (Create and WRITE_APPEND in parallel)
@@ -250,9 +252,10 @@ out:
 ### Dynamic table creating
-When `auto_create_table` is set to true, try to create the table using BigQuery API.
+This plugin tries to create a table using BigQuery API when
-If table already exists, insert into it.
+* mode is either of `delete_in_advance`, `replace`, `replace_backup`, `append`.
+* mode is `append_direct` and `auto_create_table` is true.
 There are 3 ways to set schema.
@@ -370,7 +373,7 @@ out:
 ### GCS Bucket
-This is useful to reduce number of consumed jobs, which is limited by [50,000 jobs per project per day](https://cloud.google.com/bigquery/quota-policy#import).
+This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
 This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
@@ -448,8 +451,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
 Place your embulk with `.jar` extension:
+```
+$ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
+$ chmod a+x embulk.jar
+```
+Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
+```
+$ echo JRUBY_VERSION | ./embulk.jar irb
+2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
+Switch to inspect mode.
+JRUBY_VERSION
+"X.X.X.X"
+$ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
+2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
+Switch to inspect mode.
+require 'bundler'; Bundler::VERSION
+"Y.Y.Y"
+```
+Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
+```
+$ rbenv install jruby-X.X.X.X
+$ rbenv local jruby-X.X.X.X
+$ gem install bundler -v Y.Y.Y
+```
+Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
 ```
-$ cp -a $(which embulk) embulk.jar
+$ ./embulk.jar bundle install --path vendor/bundle
 ```
 Run tests with `env RUBYOPT="-r ./embulk.jar`:

data/embulk-output-bigquery.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |spec|
   spec.name          = "embulk-output-bigquery"
-  spec.version       = "0.4.13"
+  spec.version       = "0.4.14"
   spec.authors       = ["Satoshi Akama", "Naotoshi Seo"]
   spec.summary       = "Google BigQuery output plugin for Embulk"
   spec.description   = "Embulk plugin that insert records to Google BigQuery."

data/example/config_delete_in_advance_field_partitioned_table.yml ADDED Viewed

@@ -0,0 +1,33 @@
+in:
+  type: file
+  path_prefix: example/example.csv
+  parser:
+    type: csv
+    charset: UTF-8
+    newline: CRLF
+    null_string: 'NULL'
+    skip_header_lines: 1
+    comment_line_marker: '#'
+    columns:
+      - {name: date,        type: string}
+      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
+      - {name: "null",      type: string}
+      - {name: long,        type: long}
+      - {name: string,      type: string}
+      - {name: double,      type: double}
+      - {name: boolean,     type: boolean}
+out:
+  type: bigquery
+  mode: delete_in_advance
+  auth_method: json_key
+  json_keyfile: example/your-project-000.json
+  dataset: your_dataset_name
+  table: your_field_partitioned_table_name
+  source_format: NEWLINE_DELIMITED_JSON
+  compression: NONE
+  auto_create_dataset: true
+  auto_create_table: true
+  schema_file: example/schema.json
+  time_partitioning:
+    type: 'DAY'
+    field: timestamp

data/example/config_replace_backup_field_partitioned_table.yml ADDED Viewed

@@ -0,0 +1,34 @@
+in:
+  type: file
+  path_prefix: example/example.csv
+  parser:
+    type: csv
+    charset: UTF-8
+    newline: CRLF
+    null_string: 'NULL'
+    skip_header_lines: 1
+    comment_line_marker: '#'
+    columns:
+      - {name: date,        type: string}
+      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
+      - {name: "null",      type: string}
+      - {name: long,        type: long}
+      - {name: string,      type: string}
+      - {name: double,      type: double}
+      - {name: boolean,     type: boolean}
+out:
+  type: bigquery
+  mode: replace_backup
+  auth_method: json_key
+  json_keyfile: example/your-project-000.json
+  dataset: your_dataset_name
+  table: your_field_partitioned_table_name
+  table_old: your_field_partitioned_table_name_old
+  source_format: NEWLINE_DELIMITED_JSON
+  compression: NONE
+  auto_create_dataset: true
+  auto_create_table: true
+  schema_file: example/schema.json
+  time_partitioning:
+    type: 'DAY'
+    field: 'timestamp'

data/example/{config_replace_backup_paritioned_table.yml → config_replace_backup_partitioned_table.yml} RENAMED Viewed

File without changes

data/example/config_replace_field_partitioned_table.yml ADDED Viewed

@@ -0,0 +1,33 @@
+in:
+  type: file
+  path_prefix: example/example.csv
+  parser:
+    type: csv
+    charset: UTF-8
+    newline: CRLF
+    null_string: 'NULL'
+    skip_header_lines: 1
+    comment_line_marker: '#'
+    columns:
+      - {name: date,        type: string}
+      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
+      - {name: "null",      type: string}
+      - {name: long,        type: long}
+      - {name: string,      type: string}
+      - {name: double,      type: double}
+      - {name: boolean,     type: boolean}
+out:
+  type: bigquery
+  mode: replace
+  auth_method: json_key
+  json_keyfile: example/your-project-000.json
+  dataset: your_dataset_name
+  table: your_field_partitioned_table_name
+  source_format: NEWLINE_DELIMITED_JSON
+  compression: NONE
+  auto_create_dataset: true
+  auto_create_table: true
+  schema_file: example/schema.json
+  time_partitioning:
+    type: 'DAY'
+    field: 'timestamp'

data/example/{config_replace_paritioned_table.yml → config_replace_partitioned_table.yml} RENAMED Viewed

File without changes

data/lib/embulk/output/bigquery.rb CHANGED Viewed

@@ -64,7 +64,7 @@ module Embulk
           'default_timestamp_format'       => config.param('default_timestamp_format',       :string,  :default => ValueConverterFactory::DEFAULT_TIMESTAMP_FORMAT),
           'payload_column'                 => config.param('payload_column',                 :string,  :default => nil),
           'payload_column_index'           => config.param('payload_column_index',           :integer, :default => nil),
           'open_timeout_sec'               => config.param('open_timeout_sec',               :integer, :default => nil),
           'timeout_sec'                    => config.param('timeout_sec',                    :integer, :default => nil), # google-api-ruby-client < v0.11.0
           'send_timeout_sec'               => config.param('send_timeout_sec',               :integer, :default => nil), # google-api-ruby-client >= v0.11.0
@@ -276,7 +276,7 @@ module Embulk
           sum + (response ? response.statistics.load.output_rows.to_i : 0)
         end
         if task['temp_table']
-          num_output_rows = bigquery.get_table(task['temp_table']).num_rows.to_i
+          num_output_rows = bigquery.get_table_or_partition(task['temp_table']).num_rows.to_i
         else
           num_output_rows = num_response_rows
         end
@@ -306,37 +306,48 @@ module Embulk
         case task['mode']
         when 'delete_in_advance'
-          if task['time_partitioning']
-            bigquery.delete_partition(task['table'])
-          else
-            bigquery.delete_table(task['table'])
+          bigquery.delete_partition(task['table'])
+          bigquery.create_table_if_not_exists(task['table'])
+        when 'replace'
+          bigquery.create_table_if_not_exists(task['temp_table'])
+          if Helper.has_partition_decorator?(task['table'])
+            if task['auto_create_table']
+              bigquery.create_table_if_not_exists(task['table'])
+            else
+              bigquery.get_table(task['table']) # raises NotFoundError
+            end
           end
-          bigquery.create_table(task['table'])
-        when 'replace', 'replace_backup', 'append'
-          bigquery.create_table(task['temp_table'])
-          if task['time_partitioning']
+        when 'append'
+          bigquery.create_table_if_not_exists(task['temp_table'])
+          if Helper.has_partition_decorator?(task['table'])
             if task['auto_create_table']
-              bigquery.create_table(task['table'])
+              bigquery.create_table_if_not_exists(task['table'])
             else
               bigquery.get_table(task['table']) # raises NotFoundError
             end
           end
-        else # append_direct
-          if task['auto_create_table']
-            bigquery.create_table(task['table'])
-          else
-            bigquery.get_table(task['table']) # raises NotFoundError
+        when 'replace_backup'
+          bigquery.create_table_if_not_exists(task['temp_table'])
+          if Helper.has_partition_decorator?(task['table'])
+            if task['auto_create_table']
+              bigquery.create_table_if_not_exists(task['table'])
+            else
+              bigquery.get_table(task['table']) # raises NotFoundError
+            end
           end
-        end
-        if task['mode'] == 'replace_backup'
-          if task['time_partitioning'] and Helper.has_partition_decorator?(task['table_old'])
+          if Helper.has_partition_decorator?(task['table_old'])
             if task['auto_create_table']
-              bigquery.create_table(task['table_old'], dataset: task['dataset_old'])
+              bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
             else
               bigquery.get_table(task['table_old'], dataset: task['dataset_old']) # raises NotFoundError
             end
           end
+        else # append_direct
+          if task['auto_create_table']
+            bigquery.create_table_if_not_exists(task['table'])
+          else
+            bigquery.get_table(task['table']) # raises NotFoundError
+          end
         end
       end
@@ -403,7 +414,7 @@ module Embulk
             if task['mode'] == 'replace_backup'
               begin
-                bigquery.get_table(task['table'])
+                bigquery.get_table_or_partition(task['table'])
                 bigquery.copy(task['table'], task['table_old'], task['dataset_old'])
               rescue NotFoundError
               end
@@ -515,7 +526,7 @@ module Embulk
         self.class.rehearsal_thread = Thread.new do
           begin
-            bigquery.create_table(task['rehearsal_table'])
+            bigquery.create_table_if_not_exists(task['rehearsal_table'])
             response = bigquery.load(rehearsal_path, task['rehearsal_table'])
             num_output_rows = response ? response.statistics.load.output_rows.to_i : 0
             Embulk.logger.info { "embulk-output-bigquery: Loaded rehearsal #{num_output_rows}" }

data/lib/embulk/output/bigquery/bigquery_client.rb CHANGED Viewed

@@ -116,11 +116,11 @@ module Embulk
               if @location
                 body[:job_reference][:location] = @location
               end
               if @task['schema_update_options']
                 body[:configuration][:load][:schema_update_options] = @task['schema_update_options']
               end
               opts = {}
               Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
@@ -412,7 +412,7 @@ module Embulk
           end
         end
-        def create_table(table, dataset: nil, options: nil)
+        def create_table_if_not_exists(table, dataset: nil, options: nil)
           begin
             dataset ||= @dataset
             options ||= {}
@@ -466,8 +466,17 @@ module Embulk
         end
         def delete_table(table, dataset: nil)
+          table = Helper.chomp_partition_decorator(table)
+          delete_table_or_partition(table, dataset: dataset)
+        end
+        def delete_partition(table, dataset: nil)
+          delete_table_or_partition(table, dataset: dataset)
+        end
+        # if `table` with a partition decorator is given, a partition is deleted.
+        def delete_table_or_partition(table, dataset: nil)
           begin
-            table = Helper.chomp_partition_decorator(table)
             dataset ||= @dataset
             Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
             with_network_retry { client.delete_table(@project, dataset, table) }
@@ -486,8 +495,16 @@ module Embulk
         end
         def get_table(table, dataset: nil)
+          table = Helper.chomp_partition_decorator(table)
+          get_table_or_partition(table)
+        end
+        def get_partition(table, dataset: nil)
+          get_table_or_partition(table)
+        end
+        def get_table_or_partition(table, dataset: nil)
           begin
-            table = Helper.chomp_partition_decorator(table)
             dataset ||= @dataset
             Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
             with_network_retry { client.get_table(@project, dataset, table) }
@@ -503,21 +520,6 @@ module Embulk
             raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
           end
         end
-        # Is this only a way to drop partition?
-        def delete_partition(table_with_partition, dataset: nil)
-          dataset ||= @dataset
-          begin
-            table = Helper.chomp_partition_decorator(table_with_partition)
-            get_table(table, dataset: dataset)
-          rescue NotFoundError
-          else
-            Embulk.logger.info { "embulk-output-bigquery: Delete partition... #{@project}:#{dataset}.#{table_with_partition}" }
-            Tempfile.create('embulk_output_bigquery_empty_file_') do |fp|
-              load(fp.path, table_with_partition, write_disposition: 'WRITE_TRUNCATE')
-            end
-          end
-        end
       end
     end
   end

data/lib/embulk/output/bigquery/helper.rb CHANGED Viewed

@@ -7,12 +7,16 @@ module Embulk
       class Helper
         PARTITION_DECORATOR_REGEXP = /\$.+\z/
-        def self.has_partition_decorator?(table)
-          !!(table =~ PARTITION_DECORATOR_REGEXP)
+        def self.field_partitioning?(task)
+          (task['time_partitioning'] || {}).key?('field')
         end
-        def self.chomp_partition_decorator(table)
-          table.sub(PARTITION_DECORATOR_REGEXP, '')
+        def self.has_partition_decorator?(table_name)
+          !!(table_name =~ PARTITION_DECORATOR_REGEXP)
+        end
+        def self.chomp_partition_decorator(table_name)
+          table_name.sub(PARTITION_DECORATOR_REGEXP, '')
         end
         def self.bq_type_from_embulk_type(embulk_type)

data/test/helper.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env ruby
+require 'bundler/setup'
 require 'test/unit'
 require 'test/unit/rr'
@@ -7,7 +8,7 @@ require 'embulk'
 begin
   # Embulk ~> 0.8.x
   Embulk.setup
-rescue NotImplementedError
+rescue NotImplementedError, NoMethodError, NameError
   # Embulk ~> 0.9.x
   require 'embulk/java/bootstrap'
 end

data/test/test_bigquery_client.rb CHANGED Viewed

@@ -96,20 +96,20 @@ else
           end
         end
-        sub_test_case "create_table" do
-          def test_create_table
+        sub_test_case "create_table_if_not_exists" do
+          def test_create_table_if_not_exists
             client.delete_table('your_table_name')
-            assert_nothing_raised { client.create_table('your_table_name') }
+            assert_nothing_raised { client.create_table_if_not_exists('your_table_name') }
           end
-          def test_create_table_already_exists
-            assert_nothing_raised { client.create_table('your_table_name') }
+          def test_create_table_if_not_exists_already_exists
+            assert_nothing_raised { client.create_table_if_not_exists('your_table_name') }
           end
           def test_create_partitioned_table
             client.delete_table('your_table_name')
             assert_nothing_raised do
-              client.create_table('your_table_name$20160929', options:{
+              client.create_table_if_not_exists('your_table_name$20160929', options:{
                 'time_partitioning' => {'type'=>'DAY', 'expiration_ms'=>1000}
               })
             end
@@ -118,7 +118,7 @@ else
         sub_test_case "delete_table" do
           def test_delete_table
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             assert_nothing_raised { client.delete_table('your_table_name') }
           end
@@ -127,14 +127,14 @@ else
           end
           def test_delete_partitioned_table
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             assert_nothing_raised { client.delete_table('your_table_name$20160929') }
           end
         end
         sub_test_case "get_table" do
           def test_get_table
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             assert_nothing_raised { client.get_table('your_table_name') }
           end
@@ -146,7 +146,7 @@ else
           end
           def test_get_partitioned_table
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             assert_nothing_raised { client.get_table('your_table_name$20160929') }
           end
         end
@@ -154,7 +154,7 @@ else
         sub_test_case "delete_partition" do
           def test_delete_partition
             client.delete_table('your_table_name')
-            client.create_table('your_table_name$20160929')
+            client.create_table_if_not_exists('your_table_name$20160929')
             assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
           ensure
             client.delete_table('your_table_name')
@@ -162,7 +162,7 @@ else
           def test_delete_partition_of_non_partitioned_table
             client.delete_table('your_table_name')
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             assert_raise { client.delete_partition('your_table_name$20160929') }
           ensure
             client.delete_table('your_table_name')
@@ -175,7 +175,7 @@ else
         sub_test_case "fields" do
           def test_fields_from_table
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             fields = client.fields_from_table('your_table_name')
             expected = [
               {:type=>"BOOLEAN", :name=>"boolean"},
@@ -190,15 +190,15 @@ else
         end
         sub_test_case "copy" do
-          def test_create_table
-            client.create_table('your_table_name')
+          def test_create_table_if_not_exists
+            client.create_table_if_not_exists('your_table_name')
             assert_nothing_raised { client.copy('your_table_name', 'your_table_name_old') }
           end
         end
         sub_test_case "load" do
           def test_load
-            client.create_table('your_table_name')
+            client.create_table_if_not_exists('your_table_name')
             File.write("tmp/your_file_name.csv", record.to_csv)
             assert_nothing_raised { client.load("/tmp/your_file_name.csv", 'your_table_name') }
           end

data/test/test_example.rb CHANGED Viewed

@@ -9,8 +9,10 @@ unless File.exist?(JSON_KEYFILE)
 else
   class TestExample < Test::Unit::TestCase
     def embulk_path
-      if File.exist?("#{ENV['PATH']}/.embulk/bin/embulk")
-        "#{ENV['PATH']}/.embulk/bin/embulk"
+      if File.exist?("#{ENV['HOME']}/.embulk/bin/embulk")
+        "#{ENV['HOME']}/.embulk/bin/embulk"
+      elsif File.exist?("#{ENV['PWD']}/embulk.jar")
+        "#{ENV['PWD']}/embulk.jar"
       elsif File.exist?("/usr/local/bin/embulk")
         "/usr/local/bin/embulk"
       else
@@ -19,7 +21,7 @@ else
     end
     def embulk_run(config_path)
-      Bundler.with_clean_env do
+      ::Bundler.with_clean_env do
         cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
         puts "=" * 64
         puts cmd

data/test/test_transaction.rb CHANGED Viewed

@@ -55,7 +55,7 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).create_dataset(config['dataset'])
-            mock(obj).create_table(config['table'])
+            mock(obj).create_table_if_not_exists(config['table'])
           end
           Bigquery.transaction(config, schema, processor_count, &control)
         end
@@ -74,7 +74,7 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).create_dataset(config['dataset'])
-            mock(obj).create_table(config['table'])
+            mock(obj).create_table_if_not_exists(config['table'])
           end
           Bigquery.transaction(config, schema, processor_count, &control)
         end
@@ -86,19 +86,19 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).delete_table(config['table'])
-            mock(obj).create_table(config['table'])
+            mock(obj).delete_partition(config['table'])
+            mock(obj).create_table_if_not_exists(config['table'])
           end
           Bigquery.transaction(config, schema, processor_count, &control)
         end
         def test_delete_in_advance_with_partitioning
-          config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929')
+          config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929', 'auto_create_table' => true)
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
             mock(obj).delete_partition(config['table'])
-            mock(obj).create_table(config['table'])
+            mock(obj).create_table_if_not_exists(config['table'])
           end
           Bigquery.transaction(config, schema, processor_count, &control)
         end
@@ -110,7 +110,7 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
             mock(obj).delete_table(config['temp_table'])
           end
@@ -122,7 +122,7 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
             mock(obj).get_table(config['table'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
             mock(obj).delete_table(config['temp_table'])
@@ -135,8 +135,8 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).create_table(config['temp_table'])
-            mock(obj).create_table(config['table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['table'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
             mock(obj).delete_table(config['temp_table'])
           end
@@ -151,9 +151,9 @@ module Embulk
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
             mock(obj).get_dataset(config['dataset_old'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
-            mock(obj).get_table(task['table'])
+            mock(obj).get_table_or_partition(task['table'])
             mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
@@ -168,9 +168,9 @@ module Embulk
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).create_dataset(config['dataset'])
             mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
-            mock(obj).get_table(task['table'])
+            mock(obj).get_table_or_partition(task['table'])
             mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
@@ -185,11 +185,11 @@ module Embulk
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
             mock(obj).get_dataset(config['dataset_old'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
             mock(obj).get_table(task['table'])
             mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
-            mock(obj).get_table(task['table'])
+            mock(obj).get_table_or_partition(task['table'])
             mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
@@ -204,11 +204,11 @@ module Embulk
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
             mock(obj).get_dataset(config['dataset_old'])
-            mock(obj).create_table(config['temp_table'])
-            mock(obj).create_table(task['table'])
-            mock(obj).create_table(task['table_old'], dataset: config['dataset_old'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
+            mock(obj).create_table_if_not_exists(task['table'])
+            mock(obj).create_table_if_not_exists(task['table_old'], dataset: config['dataset_old'])
-            mock(obj).get_table(task['table'])
+            mock(obj).get_table_or_partition(task['table'])
             mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
@@ -224,7 +224,7 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
             mock(obj).delete_table(config['temp_table'])
           end
@@ -236,7 +236,7 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).create_table(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
             mock(obj).get_table(config['table'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
             mock(obj).delete_table(config['temp_table'])
@@ -249,8 +249,8 @@ module Embulk
           task = Bigquery.configure(config, schema, processor_count)
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
-            mock(obj).create_table(config['temp_table'])
-            mock(obj).create_table(config['table'])
+            mock(obj).create_table_if_not_exists(config['temp_table'])
+            mock(obj).create_table_if_not_exists(config['table'])
             mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
             mock(obj).delete_table(config['temp_table'])
           end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: embulk-output-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.4.13
+  version: 0.4.14
 platform: ruby
 authors:
 - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-03-20 00:00:00.000000000 Z
+date: 2019-08-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -87,6 +87,7 @@ files:
 - example/config_client_options.yml
 - example/config_csv.yml
 - example/config_delete_in_advance.yml
+- example/config_delete_in_advance_field_partitioned_table.yml
 - example/config_delete_in_advance_partitioned_table.yml
 - example/config_expose_errors.yml
 - example/config_gcs.yml
@@ -105,8 +106,10 @@ files:
 - example/config_progress_log_interval.yml
 - example/config_replace.yml
 - example/config_replace_backup.yml
-- example/config_replace_backup_paritioned_table.yml
-- example/config_replace_paritioned_table.yml
+- example/config_replace_backup_field_partitioned_table.yml
+- example/config_replace_backup_partitioned_table.yml
+- example/config_replace_field_partitioned_table.yml
+- example/config_replace_partitioned_table.yml
 - example/config_replace_schema_update_options.yml
 - example/config_skip_file_generation.yml
 - example/config_table_strftime.yml
@@ -114,7 +117,6 @@ files:
 - example/config_uncompressed.yml
 - example/config_with_rehearsal.yml
 - example/example.csv
-- example/example.jsonl
 - example/example.yml
 - example/example2_1.csv
 - example/example2_2.csv
@@ -160,8 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.6.14.1
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: Google BigQuery output plugin for Embulk

data/example/example.jsonl DELETED Viewed

@@ -1,16 +0,0 @@
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":90,"string":"l6lTsvxd","double":903.4,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":91,"string":"XoALSEQg","double":394.5,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":92,"string":"0hgDRI_m","double":810.9,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":93,"string":"KjCRAc-A","double":477.4,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":94,"string":"fyQVGlT8","double":725.3,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":95,"string":"FpBYRPWK","double":316.6,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":96,"string":"9ikvnUqp","double":369.5,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":97,"string":"RRNYDAzK","double":506.5,"boolean":true}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":90,"string":"l6lTsvxd","double":903.4,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":91,"string":"XoALSEQg","double":394.5,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":92,"string":"0hgDRI_m","double":810.9,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":93,"string":"KjCRAc-A","double":477.4,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":94,"string":"fyQVGlT8","double":725.3,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":95,"string":"FpBYRPWK","double":316.6,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":96,"string":"9ikvnUqp","double":369.5,"boolean":false}
-{"date":"2015-07-13","timestamp":"2015-07-13 00:00:00.100000","null":null,"long":97,"string":"RRNYDAzK","double":506.5,"boolean":false}