RubyGems - embulk-output-bigquery - Versions diffs - 0.3.3 → 0.3.4 - Mend

embulk-output-bigquery 0.3.3 → 0.3.4

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +28 -7
data/embulk-output-bigquery.gemspec +1 -1
data/example/config_gcs.yml +32 -0
data/lib/embulk/output/bigquery.rb +21 -2
data/lib/embulk/output/bigquery/bigquery_client.rb +65 -63
data/lib/embulk/output/bigquery/gcs_client.rb +112 -0
data/lib/embulk/output/bigquery/google_client.rb +68 -0
data/test/test_transaction.rb +0 -7
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2784d92df0a5542880259e6d0cbdbdf4112576cf
-  data.tar.gz: 8d1a6bd77e56a49f3e303571b48c6b4b5f4f9ec5
+  metadata.gz: f68ceb57a4eff6886157c585425526389623d0a2
+  data.tar.gz: b44323059a3057bb5de7fdd7b00d61ce970f3386
 SHA512:
-  metadata.gz: a5c632416596681d347dcf26da31c0b930930c7cdb60080a1e1e77c590fc8743da5006f3e8b6edc1bccce50c5744872e83f33cb86c66b6b1c0d260747c9b8a09
-  data.tar.gz: 8d5ee5c5f4a63163c7955325aa82d71b9fb497182ce5eb264aa1ff84bf95c156936b860714da2ba1b69e07ffd5cd8b7375f153172f854bb00a1f153b9f0c2bd6
+  metadata.gz: 5cc7b1245bda2ae8c5d581c67a09ce0685c7812658c3c47e195362290fd50c13abfb7a3e9bb2360bc01a6d6aa82009ce190bef667cfb1df2cddaeb653c162c14
+  data.tar.gz: 4f8611f292a61750568c7b15e7ae6f83bc83d09ae3f64b2359b8f6f4e4d4b7ac115e09e6e8fbb5cc2e98b89103b8d1aba0640e0d89b035dbab2e5feea0d47449

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.3.4 - 2016-06-01
+* [new feature] Add `gcs_bucket` option to load multiple files from a GCS bucket with one load job
 ## 0.3.3 - 2016-05-24
 * [maintenance] Fix `private_key` auth is not working

data/README.md CHANGED Viewed

@@ -37,7 +37,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
 | name                                 | type        | required?  | default                  | description            |
 |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
-|  mode                                | string      | optional   | "append"                 | [See below](#mode)     |
+|  mode                                | string      | optional   | "append"                 | See [Mode](#mode)     |
 |  auth_method                         | string      | optional   | "private_key"            | `private_key` , `json_key` or `compute_engine`
 |  service_account_email               | string      | required when auth_method is private_key  |   | Your Google service account email
 |  p12_keyfile                         | string      | required when auth_method is private_key  |   | Fullpath of private key in P12(PKCS12) format |
@@ -46,21 +46,23 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
 |  dataset                             | string      | required   |                          | dataset |
 |  table                               | string      | required   |                          | table name |
 |  auto_create_dataset                 | boolean     | optional   | false                    | automatically create dataset |
-|  auto_create_table                   | boolean     | optional   | false                    | [See below](#dynamic-table-creating) |
+|  auto_create_table                   | boolean     | optional   | false                    | See [Dynamic Table Creating](#dynamic-table-creating) |
 |  schema_file                         | string      | optional   |                          | /path/to/schema.json |
-|  template_table                      | string      | optional   |                          | template table name [See below](#dynamic-table-creating) |
-|  prevent_duplicate_insert            | boolean     | optional   | false                    | [See below](#prevent-duplication) |
+|  template_table                      | string      | optional   |                          | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
+|  prevent_duplicate_insert            | boolean     | optional   | false                    | See [Prevent Duplication] (#prevent-duplication) |
 |  job_status_max_polling_time         | int         | optional   | 3600 sec                 | Max job status polling time |
 |  job_status_polling_interval         | int         | optional   | 10 sec                   | Job status polling interval |
 |  is_skip_job_result_check            | boolean     | optional   | false                    | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
 |  with_rehearsal                      | boolean     | optional   | false                    | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
 |  rehearsal_counts                    | integer     | optional   | 1000                     | Specify number of records to load in a rehearsal |
 |  abort_on_error                      | boolean     | optional   | true if max_bad_records is 0, otherwise false | Raise an error if number of input rows and number of output rows does not match |
-|  column_options                      | hash        | optional   |                          | [See below](#column-options) |
+|  column_options                      | hash        | optional   |                          | See [Column Options](#column-options) |
 |  default_timezone                    | string      | optional   | UTC                      | |
 |  default_timestamp_format            | string      | optional   | %Y-%m-%d %H:%M:%S.%6N    | |
-|  payload_column                      | string      | optional   | nil                      | [See below](#formatter-performance-issue) |
-|  payload_column_index                | integer     | optional   | nil                      | [See below](#formatter-performance-issue) |
+|  payload_column                      | string      | optional   | nil                      | See [Formatter Performance Issue](#formatter-performance-issue) |
+|  payload_column_index                | integer     | optional   | nil                      | See [Formatter Performance Issue](#formatter-performance-issue) |
+|  gcs_bucket                          | stringr     | optional   | nil                      | See [GCS Bucket](#gcs-bucket) |
+|  auto_create_gcs_bucket              | boolean     | optional   | false                    | See [GCS Bucket](#gcs-bucket) |
 Client or request options
@@ -345,6 +347,25 @@ out:
   prevent_duplicate_insert: true
 ```
+### GCS Bucket
+This is useful to reduce number of consumed jobs, which is limited by [10,000 jobs per project per day](https://cloud.google.com/bigquery/quota-policy#import).
+This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
+BigQuery supports loading multiple files from GCS with one job (but not from local files, sigh), therefore, uploading local files to GCS and then loading from GCS into BigQuery reduces number of consumed jobs.
+Using `gcs_bucket` option, such strategy is enabled. You may also use `auto_create_gcs_bucket` to create the specified GCS bucket automatically.
+```yaml
+out:
+  type: bigquery
+  gcs_bucket: bucket_name
+  auto_create_gcs_bucket: false
+```
+ToDo: Use https://cloud.google.com/storage/docs/streaming if google-api-ruby-client supports streaming transfers into GCS.
 ## Development
 ### Run example:

data/embulk-output-bigquery.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |spec|
   spec.name          = "embulk-output-bigquery"
-  spec.version       = "0.3.3"
+  spec.version       = "0.3.4"
   spec.authors       = ["Satoshi Akama", "Naotoshi Seo"]
   spec.summary       = "Google BigQuery output plugin for Embulk"
   spec.description   = "Embulk plugin that insert records to Google BigQuery."

data/example/config_gcs.yml ADDED Viewed

@@ -0,0 +1,32 @@
+in:
+  type: file
+  path_prefix: example/example.csv
+  parser:
+    type: csv
+    charset: UTF-8
+    newline: CRLF
+    null_string: 'NULL'
+    skip_header_lines: 1
+    comment_line_marker: '#'
+    columns:
+      - {name: date,        type: string}
+      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
+      - {name: "null",      type: string}
+      - {name: long,        type: long}
+      - {name: string,      type: string}
+      - {name: double,      type: double}
+      - {name: boolean,     type: boolean}
+out:
+  type: bigquery
+  mode: replace
+  auth_method: json_key
+  json_keyfile: example/your-project-000.json
+  dataset: your_dataset_name
+  table: your_table_name
+  source_format: NEWLINE_DELIMITED_JSON
+  compression: GZIP
+  auto_create_dataset: true
+  auto_create_table: true
+  schema_file: example/schema.json
+  gcs_bucket: your_bucket_name
+  auto_create_gcs_bucket: true

data/lib/embulk/output/bigquery.rb CHANGED Viewed

@@ -1,8 +1,10 @@
+require 'uri'
 require 'json'
 require 'tempfile'
 require 'fileutils'
 require 'securerandom'
 require_relative 'bigquery/bigquery_client'
+require_relative 'bigquery/gcs_client'
 require_relative 'bigquery/file_writer'
 require_relative 'bigquery/value_converter_factory'
@@ -73,6 +75,9 @@ module Embulk
           'skip_file_generation'           => config.param('skip_file_generation',           :bool,    :default => false),
           'compression'                    => config.param('compression',                    :string,  :default => 'NONE'),
+          'gcs_bucket'                     => config.param('gcs_bucket',                     :string,  :default => nil),
+          'auto_create_gcs_bucket'         => config.param('auto_create_gcs_bucket',         :bool,    :default => false),
           'source_format'                  => config.param('source_format',                  :string,  :default => 'CSV'),
           'max_bad_records'                => config.param('max_bad_records',                :integer, :default => 0),
           'field_delimiter'                => config.param('field_delimiter',                :string,  :default => ','),
@@ -312,8 +317,22 @@ module Embulk
           if task['skip_load'] # only for debug
             Embulk.logger.info { "embulk-output-bigquery: Skip load" }
           else
-            target_table = task['temp_table'] ? task['temp_table'] : task['table']
-            responses = bigquery.load_in_parallel(paths, target_table)
+            if !paths.empty?
+              target_table = task['temp_table'] ? task['temp_table'] : task['table']
+              if bucket = task['gcs_bucket']
+                gcs = GcsClient.new(task)
+                gcs.insert_bucket(bucket) if task['auto_create_gcs_bucket']
+                objects = paths.size.times.map { SecureRandom.uuid.to_s }
+                gcs.insert_objects(paths, objects: objects, bucket: bucket)
+                object_uris = objects.map {|object| URI.join("gs://#{bucket}", object).to_s }
+                responses = bigquery.load_from_gcs(object_uris, target_table)
+                objects.each {|object| gcs.delete_object(object, bucket: bucket) }
+              else
+                responses = bigquery.load_in_parallel(paths, target_table)
+              end
+            else
+              responses = []
+            end
             transaction_report = self.transaction_report(task, responses)
             Embulk.logger.info { "embulk-output-bigquery: transaction_report: #{transaction_report.to_json}" }

data/lib/embulk/output/bigquery/bigquery_client.rb CHANGED Viewed

@@ -1,76 +1,22 @@
 require 'google/apis/bigquery_v2'
-require 'google/api_client/auth/key_utils'
 require 'json'
 require 'thwait'
+require_relative 'google_client'
 require_relative 'helper'
 module Embulk
   module Output
     class Bigquery < OutputPlugin
-      class Error < StandardError; end
-      class JobTimeoutError < Error; end
-      class NotFoundError < Error; end
-      class BigqueryClient
+      class BigqueryClient < GoogleClient
         def initialize(task, schema, fields = nil)
-          @task = task
-          @schema = schema
-          @project = task['project']
-          @dataset = task['dataset']
-          reset_fields(fields) if fields
-        end
-        def client
-          return @cached_client if @cached_client && @cached_client_expiration > Time.now
-          client = Google::Apis::BigqueryV2::BigqueryService.new
-          client.client_options.application_name = @task['application_name']
-          client.request_options.retries = @task['retries']
-          client.request_options.timeout_sec = @task['timeout_sec']
-          client.request_options.open_timeout_sec = @task['open_timeout_sec']
-          Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
-          Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
           scope = "https://www.googleapis.com/auth/bigquery"
+          client_class = Google::Apis::BigqueryV2::BigqueryService
+          super(task, scope, client_class)
-          case @task['auth_method']
-          when 'private_key'
-            private_key_passphrase = 'notasecret'
-            key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
-            auth = Signet::OAuth2::Client.new(
-              token_credential_uri: "https://accounts.google.com/o/oauth2/token",
-              audience: "https://accounts.google.com/o/oauth2/token",
-              scope: scope,
-              issuer: @task['service_account_email'],
-              signing_key: key)
-          when 'compute_engine'
-            auth = Google::Auth::GCECredentials.new
-          when 'json_key'
-            json_key = @task['json_keyfile']
-            if File.exist?(json_key)
-              auth = File.open(json_key) do |f|
-                Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope)
-              end
-            else
-              key = StringIO.new(json_key)
-              auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
-            end
-          when 'application_default'
-            auth = Google::Auth.get_application_default([scope])
-          else
-            raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
-          end
-          client.authorization = auth
-          @cached_client_expiration = Time.now + 1800
-          @cached_client = client
+          @schema = schema
+          reset_fields(fields) if fields
+          @project = @task['project']
+          @dataset = @task['dataset']
         end
         def fields
@@ -94,6 +40,62 @@ module Embulk
           self.fields
         end
+        # @params gcs_patsh [Array] arary of gcs paths such as gs://bucket/path
+        # @return [Array] responses
+        def load_from_gcs(object_uris, table)
+          begin
+            # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
+            # we should generate job_id in client code, otherwise, retrying would cause duplication
+            if @task['prevent_duplicate_insert'] and (@task['mode'] == 'append' or @task['mode'] == 'append_direct')
+              job_id = Helper.create_load_job_id(@task, path, fields)
+            else
+              job_id = "embulk_load_job_#{SecureRandom.uuid}"
+            end
+            Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table}" }
+            body = {
+              job_reference: {
+                project_id: @project,
+                job_id: job_id,
+              },
+              configuration: {
+                load: {
+                  destination_table: {
+                    project_id: @project,
+                    dataset_id: @dataset,
+                    table_id: table,
+                  },
+                  schema: {
+                    fields: fields,
+                  },
+                  write_disposition: 'WRITE_APPEND',
+                  source_format:         @task['source_format'],
+                  max_bad_records:       @task['max_bad_records'],
+                  field_delimiter:       @task['source_format'] == 'CSV' ? @task['field_delimiter'] : nil,
+                  encoding:              @task['encoding'],
+                  ignore_unknown_values: @task['ignore_unknown_values'],
+                  allow_quoted_newlines: @task['allow_quoted_newlines'],
+                  source_uris: object_uris,
+                }
+              }
+            }
+            opts = {}
+            Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
+            response = client.insert_job(@project, body, opts)
+            unless @task['is_skip_job_result_check']
+              response = wait_load('Load', response)
+            end
+            [response]
+          rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
+            response = {status_code: e.status_code, message: e.message, error_class: e.class}
+            Embulk.logger.error {
+              "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
+            }
+            raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table}, response:#{response}"
+          end
+        end
         def load_in_parallel(paths, table)
           return [] if paths.empty?
           # You may think as, load job is a background job, so sending requests in parallel
@@ -118,7 +120,7 @@ module Embulk
           end
           ThreadsWait.all_waits(*threads) do |th|
             idx, response = th.value # raise errors occurred in threads
-            responses[idx] = response
+            responses[idx] = response if idx
           end
           responses
         end

data/lib/embulk/output/bigquery/gcs_client.rb ADDED Viewed

@@ -0,0 +1,112 @@
+require 'uri'
+require 'google/apis/storage_v1'
+require_relative 'google_client'
+require_relative 'helper'
+# ToDo: Use https://cloud.google.com/storage/docs/streaming if google-api-ruby-client supports streaming transfers
+# ToDo: Tests are not written because this implementation will probably entirely changed on supporting streaming transfers
+module Embulk
+  module Output
+    class Bigquery < OutputPlugin
+      class GcsClient < GoogleClient
+        def initialize(task)
+          scope = "https://www.googleapis.com/auth/cloud-platform"
+          client_class = Google::Apis::StorageV1::StorageService
+          super(task, scope, client_class)
+          @project = @task['project']
+          @bucket = @task['gcs_bucket']
+        end
+        def insert_bucket(bucket = nil)
+          bucket ||= @bucket
+          begin
+            Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@project}:#{bucket}" }
+            body  = {
+              name: bucket,
+            }
+            opts = {}
+            Embulk.logger.debug { "embulk-output-bigquery: insert_bucket(#{@project}, #{body}, #{opts})" }
+            client.insert_bucket(@project, body, opts)
+          rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
+            if e.status_code == 409 && /conflict:/ =~ e.message
+              # ignore 'Already Exists' error
+              return nil
+            end
+            response = {status_code: e.status_code, message: e.message, error_class: e.class}
+            Embulk.logger.error {
+              "embulk-output-bigquery: insert_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
+            }
+            raise Error, "failed to insert bucket #{@project}:#{bucket}, response:#{response}"
+          end
+        end
+        def insert_object(path, object: nil, bucket: nil)
+          bucket ||= @bucket
+          object ||= path
+          object = object.start_with?('/') ? object[1..-1] : object
+          object_uri = URI.join("gs://#{bucket}", object).to_s
+          started = Time.now
+          begin
+            Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@project}:#{object_uri}" }
+            body = {
+              name: object,
+            }
+            opts = {
+              upload_source: path,
+              content_type: 'application/octet-stream'
+            }
+            Embulk.logger.debug { "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts})" }
+            # memo: gcs is strongly consistent for insert (read-after-write). ref: https://cloud.google.com/storage/docs/consistency
+            client.insert_object(bucket, body, opts)
+          rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
+            response = {status_code: e.status_code, message: e.message, error_class: e.class}
+            Embulk.logger.error {
+              "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
+            }
+            raise Error, "failed to insert object #{@project}:#{object_uri}, response:#{response}"
+          end
+        end
+        def insert_objects(paths, objects: nil, bucket: nil)
+          return [] if paths.empty?
+          bucket ||= @bucket
+          objects ||= paths
+          raise "number of paths and objects are different" if paths.size != objects.size
+          responses = []
+          paths.each_with_index do |path, idx|
+            object = objects[idx]
+            responses << insert_object(path, object: object, bucket: bucket)
+          end
+          responses
+        end
+        def delete_object(object, bucket: nil)
+          bucket ||= @bucket
+          object = object.start_with?('/') ? object[1..-1] : object
+          object_uri = URI.join("gs://#{bucket}", object).to_s
+          begin
+            Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@project}:#{object_uri}" }
+            opts = {}
+            Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
+            response = client.delete_object(bucket, object, opts)
+          rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
+            if e.status_code == 404 # ignore 'notFound' error
+              return nil
+            end
+            response = {status_code: e.status_code, message: e.message, error_class: e.class}
+            Embulk.logger.error {
+              "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
+            }
+            raise Error, "failed to delete object #{@project}:#{object_uri}, response:#{response}"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/embulk/output/bigquery/google_client.rb ADDED Viewed

@@ -0,0 +1,68 @@
+require 'google/api_client/auth/key_utils'
+module Embulk
+  module Output
+    class Bigquery < OutputPlugin
+      class Error < StandardError; end
+      class JobTimeoutError < Error; end
+      class NotFoundError < Error; end
+      class GoogleClient
+        def initialize(task, scope, client_class)
+          @task = task
+          @scope = scope
+          @client_class = client_class
+        end
+        def client
+          return @cached_client if @cached_client && @cached_client_expiration > Time.now
+          client = @client_class.new
+          client.client_options.application_name = @task['application_name']
+          client.request_options.retries = @task['retries']
+          client.request_options.timeout_sec = @task['timeout_sec']
+          client.request_options.open_timeout_sec = @task['open_timeout_sec']
+          Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
+          Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
+          case @task['auth_method']
+          when 'private_key'
+            private_key_passphrase = 'notasecret'
+            key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
+            auth = Signet::OAuth2::Client.new(
+              token_credential_uri: "https://accounts.google.com/o/oauth2/token",
+              audience: "https://accounts.google.com/o/oauth2/token",
+              scope: @scope,
+              issuer: @task['service_account_email'],
+              signing_key: key)
+          when 'compute_engine'
+            auth = Google::Auth::GCECredentials.new
+          when 'json_key'
+            json_key = @task['json_keyfile']
+            if File.exist?(json_key)
+              auth = File.open(json_key) do |f|
+                Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
+              end
+            else
+              key = StringIO.new(json_key)
+              auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
+            end
+          when 'application_default'
+            auth = Google::Auth.get_application_default([@scope])
+          else
+            raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
+          end
+          client.authorization = auth
+          @cached_client_expiration = Time.now + 1800
+          @cached_client = client
+        end
+      end
+    end
+  end
+end

data/test/test_transaction.rb CHANGED Viewed

@@ -43,7 +43,6 @@ module Embulk
         any_instance_of(BigqueryClient) do |obj|
           mock(obj).get_dataset(config['dataset'])
           mock(obj).create_table(config['temp_table'])
-          mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
           mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
           mock(obj).delete_table(config['temp_table'])
         end
@@ -56,7 +55,6 @@ module Embulk
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).get_dataset(config['dataset'])
             mock(obj).get_table(config['table'])
-            mock(obj).load_in_parallel(anything, config['table']) { [] }
           end
           Bigquery.transaction(config, schema, processor_count, &control)
         end
@@ -66,7 +64,6 @@ module Embulk
           any_instance_of(BigqueryClient) do |obj|
             mock(obj).create_dataset(config['dataset'])
             mock(obj).create_table(config['table'])
-            mock(obj).load_in_parallel(anything, config['table']) { [] }
           end
           Bigquery.transaction(config, schema, processor_count, &control)
         end
@@ -78,7 +75,6 @@ module Embulk
           mock(obj).get_dataset(config['dataset'])
           mock(obj).delete_table(config['table'])
           mock(obj).create_table(config['table'])
-          mock(obj).load_in_parallel(anything, config['table']) { [] }
         end
         Bigquery.transaction(config, schema, processor_count, &control)
       end
@@ -88,7 +84,6 @@ module Embulk
         any_instance_of(BigqueryClient) do |obj|
           mock(obj).get_dataset(config['dataset'])
           mock(obj).create_table(config['temp_table'])
-          mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
           mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
           mock(obj).delete_table(config['temp_table'])
         end
@@ -102,7 +97,6 @@ module Embulk
             mock(obj).get_dataset(config['dataset'])
             mock(obj).get_dataset(config['dataset_old'])
             mock(obj).create_table(config['temp_table'])
-            mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
             mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
@@ -118,7 +112,6 @@ module Embulk
             mock(obj).create_dataset(config['dataset'])
             mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
             mock(obj).create_table(config['temp_table'])
-            mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
             mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: embulk-output-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.3.3
+  version: 0.3.4
 platform: ruby
 authors:
 - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-05-24 00:00:00.000000000 Z
+date: 2016-06-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: google-api-client
@@ -101,6 +101,7 @@ files:
 - example/config_csv.yml
 - example/config_delete_in_advance.yml
 - example/config_expose_errors.yml
+- example/config_gcs.yml
 - example/config_guess_from_embulk_schema.yml
 - example/config_guess_with_column_options.yml
 - example/config_gzip.yml
@@ -136,6 +137,8 @@ files:
 - lib/embulk/output/bigquery.rb
 - lib/embulk/output/bigquery/bigquery_client.rb
 - lib/embulk/output/bigquery/file_writer.rb
+- lib/embulk/output/bigquery/gcs_client.rb
+- lib/embulk/output/bigquery/google_client.rb
 - lib/embulk/output/bigquery/helper.rb
 - lib/embulk/output/bigquery/value_converter_factory.rb
 - test/helper.rb