embulk-output-bigquery 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2784d92df0a5542880259e6d0cbdbdf4112576cf
4
- data.tar.gz: 8d1a6bd77e56a49f3e303571b48c6b4b5f4f9ec5
3
+ metadata.gz: f68ceb57a4eff6886157c585425526389623d0a2
4
+ data.tar.gz: b44323059a3057bb5de7fdd7b00d61ce970f3386
5
5
  SHA512:
6
- metadata.gz: a5c632416596681d347dcf26da31c0b930930c7cdb60080a1e1e77c590fc8743da5006f3e8b6edc1bccce50c5744872e83f33cb86c66b6b1c0d260747c9b8a09
7
- data.tar.gz: 8d5ee5c5f4a63163c7955325aa82d71b9fb497182ce5eb264aa1ff84bf95c156936b860714da2ba1b69e07ffd5cd8b7375f153172f854bb00a1f153b9f0c2bd6
6
+ metadata.gz: 5cc7b1245bda2ae8c5d581c67a09ce0685c7812658c3c47e195362290fd50c13abfb7a3e9bb2360bc01a6d6aa82009ce190bef667cfb1df2cddaeb653c162c14
7
+ data.tar.gz: 4f8611f292a61750568c7b15e7ae6f83bc83d09ae3f64b2359b8f6f4e4d4b7ac115e09e6e8fbb5cc2e98b89103b8d1aba0640e0d89b035dbab2e5feea0d47449
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.4 - 2016-06-01
2
+
3
+ * [new feature] Add `gcs_bucket` option to load multiple files from a GCS bucket with one load job
4
+
1
5
  ## 0.3.3 - 2016-05-24
2
6
 
3
7
  * [maintenance] Fix `private_key` auth is not working
data/README.md CHANGED
@@ -37,7 +37,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
37
37
 
38
38
  | name | type | required? | default | description |
39
39
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
40
- | mode | string | optional | "append" | [See below](#mode) |
40
+ | mode | string | optional | "append" | See [Mode](#mode) |
41
41
  | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
42
42
  | service_account_email | string | required when auth_method is private_key | | Your Google service account email
43
43
  | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
@@ -46,21 +46,23 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
46
46
  | dataset | string | required | | dataset |
47
47
  | table | string | required | | table name |
48
48
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
49
- | auto_create_table | boolean | optional | false | [See below](#dynamic-table-creating) |
49
+ | auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
50
50
  | schema_file | string | optional | | /path/to/schema.json |
51
- | template_table | string | optional | | template table name [See below](#dynamic-table-creating) |
52
- | prevent_duplicate_insert | boolean | optional | false | [See below](#prevent-duplication) |
51
+ | template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
52
+ | prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication] (#prevent-duplication) |
53
53
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
54
54
  | job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
55
55
  | is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
56
56
  | with_rehearsal | boolean | optional | false | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
57
57
  | rehearsal_counts | integer | optional | 1000 | Specify number of records to load in a rehearsal |
58
58
  | abort_on_error | boolean | optional | true if max_bad_records is 0, otherwise false | Raise an error if number of input rows and number of output rows does not match |
59
- | column_options | hash | optional | | [See below](#column-options) |
59
+ | column_options | hash | optional | | See [Column Options](#column-options) |
60
60
  | default_timezone | string | optional | UTC | |
61
61
  | default_timestamp_format | string | optional | %Y-%m-%d %H:%M:%S.%6N | |
62
- | payload_column | string | optional | nil | [See below](#formatter-performance-issue) |
63
- | payload_column_index | integer | optional | nil | [See below](#formatter-performance-issue) |
62
+ | payload_column | string | optional | nil | See [Formatter Performance Issue](#formatter-performance-issue) |
63
+ | payload_column_index | integer | optional | nil | See [Formatter Performance Issue](#formatter-performance-issue) |
64
+ | gcs_bucket | stringr | optional | nil | See [GCS Bucket](#gcs-bucket) |
65
+ | auto_create_gcs_bucket | boolean | optional | false | See [GCS Bucket](#gcs-bucket) |
64
66
 
65
67
  Client or request options
66
68
 
@@ -345,6 +347,25 @@ out:
345
347
  prevent_duplicate_insert: true
346
348
  ```
347
349
 
350
+ ### GCS Bucket
351
+
352
+ This is useful to reduce number of consumed jobs, which is limited by [10,000 jobs per project per day](https://cloud.google.com/bigquery/quota-policy#import).
353
+
354
+ This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
355
+
356
+ BigQuery supports loading multiple files from GCS with one job (but not from local files, sigh), therefore, uploading local files to GCS and then loading from GCS into BigQuery reduces number of consumed jobs.
357
+
358
+ Using `gcs_bucket` option, such strategy is enabled. You may also use `auto_create_gcs_bucket` to create the specified GCS bucket automatically.
359
+
360
+ ```yaml
361
+ out:
362
+ type: bigquery
363
+ gcs_bucket: bucket_name
364
+ auto_create_gcs_bucket: false
365
+ ```
366
+
367
+ ToDo: Use https://cloud.google.com/storage/docs/streaming if google-api-ruby-client supports streaming transfers into GCS.
368
+
348
369
  ## Development
349
370
 
350
371
  ### Run example:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.3.3"
3
+ spec.version = "0.3.4"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -0,0 +1,32 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: example/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ compression: GZIP
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json
31
+ gcs_bucket: your_bucket_name
32
+ auto_create_gcs_bucket: true
@@ -1,8 +1,10 @@
1
+ require 'uri'
1
2
  require 'json'
2
3
  require 'tempfile'
3
4
  require 'fileutils'
4
5
  require 'securerandom'
5
6
  require_relative 'bigquery/bigquery_client'
7
+ require_relative 'bigquery/gcs_client'
6
8
  require_relative 'bigquery/file_writer'
7
9
  require_relative 'bigquery/value_converter_factory'
8
10
 
@@ -73,6 +75,9 @@ module Embulk
73
75
  'skip_file_generation' => config.param('skip_file_generation', :bool, :default => false),
74
76
  'compression' => config.param('compression', :string, :default => 'NONE'),
75
77
 
78
+ 'gcs_bucket' => config.param('gcs_bucket', :string, :default => nil),
79
+ 'auto_create_gcs_bucket' => config.param('auto_create_gcs_bucket', :bool, :default => false),
80
+
76
81
  'source_format' => config.param('source_format', :string, :default => 'CSV'),
77
82
  'max_bad_records' => config.param('max_bad_records', :integer, :default => 0),
78
83
  'field_delimiter' => config.param('field_delimiter', :string, :default => ','),
@@ -312,8 +317,22 @@ module Embulk
312
317
  if task['skip_load'] # only for debug
313
318
  Embulk.logger.info { "embulk-output-bigquery: Skip load" }
314
319
  else
315
- target_table = task['temp_table'] ? task['temp_table'] : task['table']
316
- responses = bigquery.load_in_parallel(paths, target_table)
320
+ if !paths.empty?
321
+ target_table = task['temp_table'] ? task['temp_table'] : task['table']
322
+ if bucket = task['gcs_bucket']
323
+ gcs = GcsClient.new(task)
324
+ gcs.insert_bucket(bucket) if task['auto_create_gcs_bucket']
325
+ objects = paths.size.times.map { SecureRandom.uuid.to_s }
326
+ gcs.insert_objects(paths, objects: objects, bucket: bucket)
327
+ object_uris = objects.map {|object| URI.join("gs://#{bucket}", object).to_s }
328
+ responses = bigquery.load_from_gcs(object_uris, target_table)
329
+ objects.each {|object| gcs.delete_object(object, bucket: bucket) }
330
+ else
331
+ responses = bigquery.load_in_parallel(paths, target_table)
332
+ end
333
+ else
334
+ responses = []
335
+ end
317
336
  transaction_report = self.transaction_report(task, responses)
318
337
  Embulk.logger.info { "embulk-output-bigquery: transaction_report: #{transaction_report.to_json}" }
319
338
 
@@ -1,76 +1,22 @@
1
1
  require 'google/apis/bigquery_v2'
2
- require 'google/api_client/auth/key_utils'
3
2
  require 'json'
4
3
  require 'thwait'
4
+ require_relative 'google_client'
5
5
  require_relative 'helper'
6
6
 
7
7
  module Embulk
8
8
  module Output
9
9
  class Bigquery < OutputPlugin
10
- class Error < StandardError; end
11
- class JobTimeoutError < Error; end
12
- class NotFoundError < Error; end
13
-
14
- class BigqueryClient
10
+ class BigqueryClient < GoogleClient
15
11
  def initialize(task, schema, fields = nil)
16
- @task = task
17
- @schema = schema
18
-
19
- @project = task['project']
20
- @dataset = task['dataset']
21
-
22
- reset_fields(fields) if fields
23
- end
24
-
25
- def client
26
- return @cached_client if @cached_client && @cached_client_expiration > Time.now
27
-
28
- client = Google::Apis::BigqueryV2::BigqueryService.new
29
- client.client_options.application_name = @task['application_name']
30
- client.request_options.retries = @task['retries']
31
- client.request_options.timeout_sec = @task['timeout_sec']
32
- client.request_options.open_timeout_sec = @task['open_timeout_sec']
33
- Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
34
- Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
35
-
36
12
  scope = "https://www.googleapis.com/auth/bigquery"
13
+ client_class = Google::Apis::BigqueryV2::BigqueryService
14
+ super(task, scope, client_class)
37
15
 
38
- case @task['auth_method']
39
- when 'private_key'
40
- private_key_passphrase = 'notasecret'
41
- key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
42
- auth = Signet::OAuth2::Client.new(
43
- token_credential_uri: "https://accounts.google.com/o/oauth2/token",
44
- audience: "https://accounts.google.com/o/oauth2/token",
45
- scope: scope,
46
- issuer: @task['service_account_email'],
47
- signing_key: key)
48
-
49
- when 'compute_engine'
50
- auth = Google::Auth::GCECredentials.new
51
-
52
- when 'json_key'
53
- json_key = @task['json_keyfile']
54
- if File.exist?(json_key)
55
- auth = File.open(json_key) do |f|
56
- Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope)
57
- end
58
- else
59
- key = StringIO.new(json_key)
60
- auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
61
- end
62
-
63
- when 'application_default'
64
- auth = Google::Auth.get_application_default([scope])
65
-
66
- else
67
- raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
68
- end
69
-
70
- client.authorization = auth
71
-
72
- @cached_client_expiration = Time.now + 1800
73
- @cached_client = client
16
+ @schema = schema
17
+ reset_fields(fields) if fields
18
+ @project = @task['project']
19
+ @dataset = @task['dataset']
74
20
  end
75
21
 
76
22
  def fields
@@ -94,6 +40,62 @@ module Embulk
94
40
  self.fields
95
41
  end
96
42
 
43
+ # @params gcs_patsh [Array] arary of gcs paths such as gs://bucket/path
44
+ # @return [Array] responses
45
+ def load_from_gcs(object_uris, table)
46
+ begin
47
+ # As https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs says,
48
+ # we should generate job_id in client code, otherwise, retrying would cause duplication
49
+ if @task['prevent_duplicate_insert'] and (@task['mode'] == 'append' or @task['mode'] == 'append_direct')
50
+ job_id = Helper.create_load_job_id(@task, path, fields)
51
+ else
52
+ job_id = "embulk_load_job_#{SecureRandom.uuid}"
53
+ end
54
+ Embulk.logger.info { "embulk-output-bigquery: Load job starting... job_id:[#{job_id}] #{object_uris} => #{@project}:#{@dataset}.#{table}" }
55
+
56
+ body = {
57
+ job_reference: {
58
+ project_id: @project,
59
+ job_id: job_id,
60
+ },
61
+ configuration: {
62
+ load: {
63
+ destination_table: {
64
+ project_id: @project,
65
+ dataset_id: @dataset,
66
+ table_id: table,
67
+ },
68
+ schema: {
69
+ fields: fields,
70
+ },
71
+ write_disposition: 'WRITE_APPEND',
72
+ source_format: @task['source_format'],
73
+ max_bad_records: @task['max_bad_records'],
74
+ field_delimiter: @task['source_format'] == 'CSV' ? @task['field_delimiter'] : nil,
75
+ encoding: @task['encoding'],
76
+ ignore_unknown_values: @task['ignore_unknown_values'],
77
+ allow_quoted_newlines: @task['allow_quoted_newlines'],
78
+ source_uris: object_uris,
79
+ }
80
+ }
81
+ }
82
+ opts = {}
83
+
84
+ Embulk.logger.debug { "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts})" }
85
+ response = client.insert_job(@project, body, opts)
86
+ unless @task['is_skip_job_result_check']
87
+ response = wait_load('Load', response)
88
+ end
89
+ [response]
90
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
91
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
92
+ Embulk.logger.error {
93
+ "embulk-output-bigquery: insert_job(#{@project}, #{body}, #{opts}), response:#{response}"
94
+ }
95
+ raise Error, "failed to load #{object_uris} to #{@project}:#{@dataset}.#{table}, response:#{response}"
96
+ end
97
+ end
98
+
97
99
  def load_in_parallel(paths, table)
98
100
  return [] if paths.empty?
99
101
  # You may think as, load job is a background job, so sending requests in parallel
@@ -118,7 +120,7 @@ module Embulk
118
120
  end
119
121
  ThreadsWait.all_waits(*threads) do |th|
120
122
  idx, response = th.value # raise errors occurred in threads
121
- responses[idx] = response
123
+ responses[idx] = response if idx
122
124
  end
123
125
  responses
124
126
  end
@@ -0,0 +1,112 @@
1
+ require 'uri'
2
+ require 'google/apis/storage_v1'
3
+ require_relative 'google_client'
4
+ require_relative 'helper'
5
+
6
+ # ToDo: Use https://cloud.google.com/storage/docs/streaming if google-api-ruby-client supports streaming transfers
7
+ # ToDo: Tests are not written because this implementation will probably entirely changed on supporting streaming transfers
8
+ module Embulk
9
+ module Output
10
+ class Bigquery < OutputPlugin
11
+ class GcsClient < GoogleClient
12
+ def initialize(task)
13
+ scope = "https://www.googleapis.com/auth/cloud-platform"
14
+ client_class = Google::Apis::StorageV1::StorageService
15
+ super(task, scope, client_class)
16
+
17
+ @project = @task['project']
18
+ @bucket = @task['gcs_bucket']
19
+ end
20
+
21
+ def insert_bucket(bucket = nil)
22
+ bucket ||= @bucket
23
+ begin
24
+ Embulk.logger.info { "embulk-output-bigquery: Insert bucket... #{@project}:#{bucket}" }
25
+ body = {
26
+ name: bucket,
27
+ }
28
+ opts = {}
29
+
30
+ Embulk.logger.debug { "embulk-output-bigquery: insert_bucket(#{@project}, #{body}, #{opts})" }
31
+ client.insert_bucket(@project, body, opts)
32
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
33
+ if e.status_code == 409 && /conflict:/ =~ e.message
34
+ # ignore 'Already Exists' error
35
+ return nil
36
+ end
37
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
38
+ Embulk.logger.error {
39
+ "embulk-output-bigquery: insert_bucket(#{@project}, #{body}, #{opts}), response:#{response}"
40
+ }
41
+ raise Error, "failed to insert bucket #{@project}:#{bucket}, response:#{response}"
42
+ end
43
+ end
44
+
45
+ def insert_object(path, object: nil, bucket: nil)
46
+ bucket ||= @bucket
47
+ object ||= path
48
+ object = object.start_with?('/') ? object[1..-1] : object
49
+ object_uri = URI.join("gs://#{bucket}", object).to_s
50
+
51
+ started = Time.now
52
+ begin
53
+ Embulk.logger.info { "embulk-output-bigquery: Insert object... #{path} => #{@project}:#{object_uri}" }
54
+ body = {
55
+ name: object,
56
+ }
57
+ opts = {
58
+ upload_source: path,
59
+ content_type: 'application/octet-stream'
60
+ }
61
+
62
+ Embulk.logger.debug { "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts})" }
63
+ # memo: gcs is strongly consistent for insert (read-after-write). ref: https://cloud.google.com/storage/docs/consistency
64
+ client.insert_object(bucket, body, opts)
65
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
66
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
67
+ Embulk.logger.error {
68
+ "embulk-output-bigquery: insert_object(#{bucket}, #{body}, #{opts}), response:#{response}"
69
+ }
70
+ raise Error, "failed to insert object #{@project}:#{object_uri}, response:#{response}"
71
+ end
72
+ end
73
+
74
+ def insert_objects(paths, objects: nil, bucket: nil)
75
+ return [] if paths.empty?
76
+ bucket ||= @bucket
77
+ objects ||= paths
78
+ raise "number of paths and objects are different" if paths.size != objects.size
79
+
80
+ responses = []
81
+ paths.each_with_index do |path, idx|
82
+ object = objects[idx]
83
+ responses << insert_object(path, object: object, bucket: bucket)
84
+ end
85
+ responses
86
+ end
87
+
88
+ def delete_object(object, bucket: nil)
89
+ bucket ||= @bucket
90
+ object = object.start_with?('/') ? object[1..-1] : object
91
+ object_uri = URI.join("gs://#{bucket}", object).to_s
92
+ begin
93
+ Embulk.logger.info { "embulk-output-bigquery: Delete object... #{@project}:#{object_uri}" }
94
+ opts = {}
95
+
96
+ Embulk.logger.debug { "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts})" }
97
+ response = client.delete_object(bucket, object, opts)
98
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
99
+ if e.status_code == 404 # ignore 'notFound' error
100
+ return nil
101
+ end
102
+ response = {status_code: e.status_code, message: e.message, error_class: e.class}
103
+ Embulk.logger.error {
104
+ "embulk-output-bigquery: delete_object(#{bucket}, #{object}, #{opts}), response:#{response}"
105
+ }
106
+ raise Error, "failed to delete object #{@project}:#{object_uri}, response:#{response}"
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,68 @@
1
+ require 'google/api_client/auth/key_utils'
2
+
3
+ module Embulk
4
+ module Output
5
+ class Bigquery < OutputPlugin
6
+ class Error < StandardError; end
7
+ class JobTimeoutError < Error; end
8
+ class NotFoundError < Error; end
9
+
10
+ class GoogleClient
11
+ def initialize(task, scope, client_class)
12
+ @task = task
13
+ @scope = scope
14
+ @client_class = client_class
15
+ end
16
+
17
+ def client
18
+ return @cached_client if @cached_client && @cached_client_expiration > Time.now
19
+
20
+ client = @client_class.new
21
+ client.client_options.application_name = @task['application_name']
22
+ client.request_options.retries = @task['retries']
23
+ client.request_options.timeout_sec = @task['timeout_sec']
24
+ client.request_options.open_timeout_sec = @task['open_timeout_sec']
25
+ Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
26
+ Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
27
+
28
+ case @task['auth_method']
29
+ when 'private_key'
30
+ private_key_passphrase = 'notasecret'
31
+ key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
32
+ auth = Signet::OAuth2::Client.new(
33
+ token_credential_uri: "https://accounts.google.com/o/oauth2/token",
34
+ audience: "https://accounts.google.com/o/oauth2/token",
35
+ scope: @scope,
36
+ issuer: @task['service_account_email'],
37
+ signing_key: key)
38
+
39
+ when 'compute_engine'
40
+ auth = Google::Auth::GCECredentials.new
41
+
42
+ when 'json_key'
43
+ json_key = @task['json_keyfile']
44
+ if File.exist?(json_key)
45
+ auth = File.open(json_key) do |f|
46
+ Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
47
+ end
48
+ else
49
+ key = StringIO.new(json_key)
50
+ auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
51
+ end
52
+
53
+ when 'application_default'
54
+ auth = Google::Auth.get_application_default([@scope])
55
+
56
+ else
57
+ raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
58
+ end
59
+
60
+ client.authorization = auth
61
+
62
+ @cached_client_expiration = Time.now + 1800
63
+ @cached_client = client
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -43,7 +43,6 @@ module Embulk
43
43
  any_instance_of(BigqueryClient) do |obj|
44
44
  mock(obj).get_dataset(config['dataset'])
45
45
  mock(obj).create_table(config['temp_table'])
46
- mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
47
46
  mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
48
47
  mock(obj).delete_table(config['temp_table'])
49
48
  end
@@ -56,7 +55,6 @@ module Embulk
56
55
  any_instance_of(BigqueryClient) do |obj|
57
56
  mock(obj).get_dataset(config['dataset'])
58
57
  mock(obj).get_table(config['table'])
59
- mock(obj).load_in_parallel(anything, config['table']) { [] }
60
58
  end
61
59
  Bigquery.transaction(config, schema, processor_count, &control)
62
60
  end
@@ -66,7 +64,6 @@ module Embulk
66
64
  any_instance_of(BigqueryClient) do |obj|
67
65
  mock(obj).create_dataset(config['dataset'])
68
66
  mock(obj).create_table(config['table'])
69
- mock(obj).load_in_parallel(anything, config['table']) { [] }
70
67
  end
71
68
  Bigquery.transaction(config, schema, processor_count, &control)
72
69
  end
@@ -78,7 +75,6 @@ module Embulk
78
75
  mock(obj).get_dataset(config['dataset'])
79
76
  mock(obj).delete_table(config['table'])
80
77
  mock(obj).create_table(config['table'])
81
- mock(obj).load_in_parallel(anything, config['table']) { [] }
82
78
  end
83
79
  Bigquery.transaction(config, schema, processor_count, &control)
84
80
  end
@@ -88,7 +84,6 @@ module Embulk
88
84
  any_instance_of(BigqueryClient) do |obj|
89
85
  mock(obj).get_dataset(config['dataset'])
90
86
  mock(obj).create_table(config['temp_table'])
91
- mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
92
87
  mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
93
88
  mock(obj).delete_table(config['temp_table'])
94
89
  end
@@ -102,7 +97,6 @@ module Embulk
102
97
  mock(obj).get_dataset(config['dataset'])
103
98
  mock(obj).get_dataset(config['dataset_old'])
104
99
  mock(obj).create_table(config['temp_table'])
105
- mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
106
100
 
107
101
  mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
108
102
 
@@ -118,7 +112,6 @@ module Embulk
118
112
  mock(obj).create_dataset(config['dataset'])
119
113
  mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
120
114
  mock(obj).create_table(config['temp_table'])
121
- mock(obj).load_in_parallel(anything, config['temp_table']) { [] }
122
115
 
123
116
  mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
124
117
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-24 00:00:00.000000000 Z
12
+ date: 2016-06-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-api-client
@@ -101,6 +101,7 @@ files:
101
101
  - example/config_csv.yml
102
102
  - example/config_delete_in_advance.yml
103
103
  - example/config_expose_errors.yml
104
+ - example/config_gcs.yml
104
105
  - example/config_guess_from_embulk_schema.yml
105
106
  - example/config_guess_with_column_options.yml
106
107
  - example/config_gzip.yml
@@ -136,6 +137,8 @@ files:
136
137
  - lib/embulk/output/bigquery.rb
137
138
  - lib/embulk/output/bigquery/bigquery_client.rb
138
139
  - lib/embulk/output/bigquery/file_writer.rb
140
+ - lib/embulk/output/bigquery/gcs_client.rb
141
+ - lib/embulk/output/bigquery/google_client.rb
139
142
  - lib/embulk/output/bigquery/helper.rb
140
143
  - lib/embulk/output/bigquery/value_converter_factory.rb
141
144
  - test/helper.rb