embulk-output-bigquery 0.5.0 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +29 -4
  3. data/README.md +71 -42
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +11 -20
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  8. data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
  9. data/test/test_bigquery_client.rb +1 -5
  10. data/test/test_configure.rb +4 -12
  11. data/test/test_helper.rb +7 -1
  12. data/test/test_transaction.rb +5 -6
  13. data/test/test_value_converter_factory.rb +86 -0
  14. metadata +29 -51
  15. data/example/config_append_direct_schema_update_options.yml +0 -31
  16. data/example/config_client_options.yml +0 -33
  17. data/example/config_csv.yml +0 -30
  18. data/example/config_delete_in_advance.yml +0 -29
  19. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  20. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  21. data/example/config_expose_errors.yml +0 -30
  22. data/example/config_gcs.yml +0 -32
  23. data/example/config_guess_from_embulk_schema.yml +0 -29
  24. data/example/config_guess_with_column_options.yml +0 -40
  25. data/example/config_gzip.yml +0 -1
  26. data/example/config_jsonl.yml +0 -1
  27. data/example/config_max_threads.yml +0 -34
  28. data/example/config_min_ouput_tasks.yml +0 -34
  29. data/example/config_mode_append.yml +0 -30
  30. data/example/config_mode_append_direct.yml +0 -30
  31. data/example/config_nested_record.yml +0 -1
  32. data/example/config_payload_column.yml +0 -20
  33. data/example/config_payload_column_index.yml +0 -20
  34. data/example/config_progress_log_interval.yml +0 -31
  35. data/example/config_replace.yml +0 -30
  36. data/example/config_replace_backup.yml +0 -32
  37. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  38. data/example/config_replace_backup_partitioned_table.yml +0 -34
  39. data/example/config_replace_field_partitioned_table.yml +0 -33
  40. data/example/config_replace_partitioned_table.yml +0 -33
  41. data/example/config_replace_schema_update_options.yml +0 -33
  42. data/example/config_skip_file_generation.yml +0 -32
  43. data/example/config_table_strftime.yml +0 -30
  44. data/example/config_template_table.yml +0 -21
  45. data/example/config_uncompressed.yml +0 -1
  46. data/example/config_with_rehearsal.yml +0 -33
  47. data/example/example.csv +0 -17
  48. data/example/example.yml +0 -1
  49. data/example/example2_1.csv +0 -1
  50. data/example/example2_2.csv +0 -1
  51. data/example/example4_1.csv +0 -1
  52. data/example/example4_2.csv +0 -1
  53. data/example/example4_3.csv +0 -1
  54. data/example/example4_4.csv +0 -1
  55. data/example/json_key.json +0 -12
  56. data/example/nested_example.jsonl +0 -16
  57. data/example/schema.json +0 -30
  58. data/example/schema_expose_errors.json +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 3e0087103039718cb24224b6bb793d820b53b935194d412e4b2984aba3d7d7a8
4
- data.tar.gz: 9ac27a3b881277450cbfaa096de0690c721a8f86f0e78abb692c8a4ed5b679d5
2
+ SHA1:
3
+ metadata.gz: 7ea0cf04e91a092e3d97bf3e46d1c181aab4943f
4
+ data.tar.gz: 60e970acbc16128189df8c274a832d23160e4c80
5
5
  SHA512:
6
- metadata.gz: 6b0ccf4e349a5d15321cfcc97138a98676bddfd412fd6fadfc8b1e0d6cd31d9739a8a5f46ccd923644543ae43cc0134b3e7598f80d89c330a4ac8aec49c084c1
7
- data.tar.gz: f02557cdd7956620ae59eb6bc0e5872992d20a65881bd69230b0b0442342a36203d1eedd8a20702d2000f412b909359657bfa300b3e82b5f494398ea6e5ea301
6
+ metadata.gz: 14fb288ad9781515a28cf72869ca7e76081202b8cf7e29d2448bb9ec37e6a8e8e73a046930f53c41785551b354a990ccc8ee2f9298b418de672c6dfaa2e6447b
7
+ data.tar.gz: 9f8f59c89cf7cc9974ab8a287ffbf522263ff5e81ff988de95f22c9086cf805d39d8f9c7c7d41310357464a29e5ec0404fe819b5ba41de36eb0d870c4ca31144
data/CHANGELOG.md CHANGED
@@ -1,9 +1,34 @@
1
+ ## 0.6.4 - 2019-11-06
2
+
3
+ * [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
4
+
5
+ ## 0.6.3 - 2019-10-28
6
+
7
+ * [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
8
+
9
+ ## 0.6.2 - 2019-10-16
10
+
11
+ * [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
12
+
13
+ ## 0.6.1 - 2019-08-28
14
+
15
+ * [maintenance] Release a new gem not to include symlinks to make it work on Windows.
16
+
17
+ ## 0.6.0 - 2019-08-11
18
+
19
+ Cleanup `auth_method`:
20
+
21
+ * [enhancement] Support `auth_method: authorized_user` (OAuth)
22
+ * [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
23
+ * [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
24
+ * [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
25
+
1
26
  ## 0.5.0 - 2019-08-10
2
27
 
3
- * [incompatibility change] Drop deprecated time\_partitioning.require\_partition\_filter
4
- * [incompatibility change] Drop prevent\_duplicate\_insert which has no use-case now
5
- * [incompatibility change] Change default value of `auto\_create\_table` to `true` from `false`
6
- * Modes `replace`, `replace_backup`, `append`, `delete_in_advance`, that is, except `append_direct` requires `auto_create_table: true`.
28
+ * [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
29
+ * [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
30
+ * [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
31
+ * [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
7
32
 
8
33
  ## 0.4.14 - 2019-08-10
9
34
 
data/README.md CHANGED
@@ -29,17 +29,15 @@ OAuth flow for installed applications.
29
29
 
30
30
  | name | type | required? | default | description |
31
31
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
32
- | mode | string | optional | "append" | See [Mode](#mode) |
33
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
34
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
35
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
36
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
37
- | project | string | required if json_keyfile is not given | | project_id |
32
+ | mode | string | optional | "append" | See [Mode](#mode) |
33
+ | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
+ | json_keyfile | string | optional | | keyfile path or `content` |
35
+ | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
38
36
  | dataset | string | required | | dataset |
39
37
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
40
38
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
41
39
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
42
- | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes requires `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
40
+ | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
43
41
  | schema_file | string | optional | | /path/to/schema.json |
44
42
  | template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
45
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
@@ -108,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
108
106
  out:
109
107
  type: bigquery
110
108
  mode: append
111
- auth_method: private_key # default
112
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
113
- p12_keyfile: /path/to/p12_keyfile.p12
109
+ auth_method: service_account
110
+ json_keyfile: /path/to/json_keyfile.json
114
111
  project: your-project-000
115
112
  dataset: your_dataset_name
116
113
  table: your_table_name
@@ -118,7 +115,7 @@ out:
118
115
  source_format: NEWLINE_DELIMITED_JSON
119
116
  ```
120
117
 
121
- ### location
118
+ ### Location
122
119
 
123
120
  The geographic location of the dataset. Required except for US and EU.
124
121
 
@@ -126,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
126
123
 
127
124
  See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
128
125
 
129
- ### mode
126
+ ### Mode
130
127
 
131
128
  5 modes are provided.
132
129
 
@@ -165,53 +162,69 @@ NOTE: BigQuery does not support replacing (actually, copying into) a non-partiti
165
162
 
166
163
  ### Authentication
167
164
 
168
- There are three methods supported to fetch access token for the service account.
165
+ There are four authentication methods
166
+
167
+ 1. `service_account` (or `json_key` for backward compatibility)
168
+ 1. `authorized_user`
169
+ 1. `compute_engine`
170
+ 1. `application_default`
169
171
 
170
- 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
171
- 2. JSON key of GCP(Google Cloud Platform)'s service account
172
- 3. Pre-defined access token (Google Compute Engine only)
172
+ #### service\_account (or json\_key)
173
173
 
174
- #### Public-Private key pair of GCP's service account
174
+ Use GCP service account credentials.
175
+ You first need to create a service account, download its json key and deploy the key with embulk.
176
+
177
+ ```yaml
178
+ out:
179
+ type: bigquery
180
+ auth_method: service_account
181
+ json_keyfile: /path/to/json_keyfile.json
182
+ ```
175
183
 
176
- You first need to create a service account (client ID),
177
- download its private key and deploy the key with embulk.
184
+ You can also embed contents of `json_keyfile` at config.yml.
178
185
 
179
186
  ```yaml
180
187
  out:
181
188
  type: bigquery
182
- auth_method: private_key # default
183
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
184
- p12_keyfile: /path/to/p12_keyfile.p12
189
+ auth_method: service_account
190
+ json_keyfile:
191
+ content: |
192
+ {
193
+ "private_key_id": "123456789",
194
+ "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
195
+ "client_email": "..."
196
+ }
185
197
  ```
186
198
 
187
- #### JSON key of GCP's service account
199
+ #### authorized\_user
188
200
 
189
- You first need to create a service account (client ID),
190
- download its json key and deploy the key with embulk.
201
+ Use Google user credentials.
202
+ You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
191
203
 
192
204
  ```yaml
193
205
  out:
194
206
  type: bigquery
195
- auth_method: json_key
196
- json_keyfile: /path/to/json_keyfile.json
207
+ auth_method: authorized_user
208
+ json_keyfile: /path/to/credentials.json
197
209
  ```
198
210
 
199
- You can also embed contents of json_keyfile at config.yml.
211
+ You can also embed contents of `json_keyfile` at config.yml.
200
212
 
201
213
  ```yaml
202
214
  out:
203
215
  type: bigquery
204
- auth_method: json_key
216
+ auth_method: authorized_user
205
217
  json_keyfile:
206
218
  content: |
207
219
  {
208
- "private_key_id": "123456789",
209
- "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
210
- "client_email": "..."
211
- }
220
+ "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
221
+ "client_secret":"xxxxxxxxxxx",
222
+ "refresh_token":"xxxxxxxxxxx",
223
+ "type":"authorized_user"
224
+ }
212
225
  ```
213
226
 
214
- #### Pre-defined access token(GCE only)
227
+ #### compute\_engine
215
228
 
216
229
  On the other hand, you don't need to explicitly create a service account for embulk when you
217
230
  run embulk in Google Compute Engine. In this third authentication method, you need to
@@ -224,6 +237,22 @@ out:
224
237
  auth_method: compute_engine
225
238
  ```
226
239
 
240
+ #### application\_default
241
+
242
+ Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
243
+
244
+ 1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
245
+ 2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
246
+ 3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
247
+
248
+ See https://cloud.google.com/docs/authentication/production for details.
249
+
250
+ ```yaml
251
+ out:
252
+ type: bigquery
253
+ auth_method: application_default
254
+ ```
255
+
227
256
  ### Table id formatting
228
257
 
229
258
  `table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
@@ -232,12 +261,12 @@ Table ids are formatted at runtime
232
261
  using the local time of the embulk server.
233
262
 
234
263
  For example, with the configuration below,
235
- data is inserted into tables `table_2015_04`, `table_2015_05` and so on.
264
+ data is inserted into tables `table_20150503`, `table_20150504` and so on.
236
265
 
237
266
  ```yaml
238
267
  out:
239
268
  type: bigquery
240
- table: table_%Y_%m
269
+ table: table_%Y%m%d
241
270
  ```
242
271
 
243
272
  ### Dynamic table creating
@@ -252,7 +281,7 @@ Please set file path of schema.json.
252
281
  out:
253
282
  type: bigquery
254
283
  auto_create_table: true
255
- table: table_%Y_%m
284
+ table: table_%Y%m%d
256
285
  schema_file: /path/to/schema.json
257
286
  ```
258
287
 
@@ -264,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
264
293
  out:
265
294
  type: bigquery
266
295
  auto_create_table: true
267
- table: table_%Y_%m
296
+ table: table_%Y%m%d
268
297
  template_table: existing_table_name
269
298
  ```
270
299
 
@@ -278,17 +307,17 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
278
307
 
279
308
  - **column_options**: advanced: an array of options for columns
280
309
  - **name**: column name
281
- - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
310
+ - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
282
311
  - boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
283
312
  - long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
284
313
  - double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
285
- - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
286
- - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
314
+ - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
315
+ - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
287
316
  - json: `STRING`, `RECORD` (default: `STRING`)
288
317
  - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
289
318
  - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
290
319
  - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
291
- - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
320
+ - **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
292
321
  - **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
293
322
  - **default_timezone**: default timezone for column_options (string, default is "UTC")
294
323
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.5.0"
3
+ spec.version = "0.6.4"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
8
8
  spec.licenses = ["MIT"]
9
9
  spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
10
 
11
- spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
11
+ # Exclude example directory which uses symlinks from generating gem.
12
+ # Symlinks do not work properly on the Windows platform without administrator privilege.
13
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
12
14
  spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
15
  spec.require_paths = ["lib"]
14
16
 
15
- spec.add_dependency 'google-api-client'
17
+ # TODO
18
+ # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
+ # So, Force install signet < 0.12 and google-api-client < 0.33.0
21
+ spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
22
+ spec.add_dependency 'google-api-client','< 0.33.0'
16
23
  spec.add_dependency 'time_with_zone'
17
24
 
18
25
  spec.add_development_dependency 'bundler', ['>= 1.10.6']
@@ -23,7 +23,7 @@ module Embulk
23
23
  # @return JSON string
24
24
  def self.load(v)
25
25
  if v.is_a?(String) # path
26
- File.read(v)
26
+ File.read(File.expand_path(v))
27
27
  elsif v.is_a?(Hash)
28
28
  v['content']
29
29
  end
@@ -33,9 +33,7 @@ module Embulk
33
33
  def self.configure(config, schema, task_count)
34
34
  task = {
35
35
  'mode' => config.param('mode', :string, :default => 'append'),
36
- 'auth_method' => config.param('auth_method', :string, :default => 'private_key'),
37
- 'service_account_email' => config.param('service_account_email', :string, :default => nil),
38
- 'p12_keyfile' => config.param('p12_keyfile', :string, :default => nil),
36
+ 'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
39
37
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
40
38
  'project' => config.param('project', :string, :default => nil),
41
39
  'dataset' => config.param('dataset', :string),
@@ -125,28 +123,21 @@ module Embulk
125
123
  end
126
124
 
127
125
  task['auth_method'] = task['auth_method'].downcase
128
- unless %w[private_key json_key compute_engine application_default].include?(task['auth_method'])
129
- raise ConfigError.new "`auth_method` must be one of private_key, json_key, compute_engine, application_default"
126
+ unless %w[json_key service_account authorized_user compute_engine application_default].include?(task['auth_method'])
127
+ raise ConfigError.new "`auth_method` must be one of service_account (or json_key), authorized_user, compute_engine, application_default"
130
128
  end
131
- if task['auth_method'] == 'private_key' and task['p12_keyfile'].nil?
132
- raise ConfigError.new "`p12_keyfile` is required for auth_method private_key"
133
- end
134
- if task['auth_method'] == 'json_key' and task['json_keyfile'].nil?
135
- raise ConfigError.new "`json_keyfile` is required for auth_method json_key"
129
+ if (task['auth_method'] == 'service_account' or task['auth_method'] == 'json_key') and task['json_keyfile'].nil?
130
+ raise ConfigError.new "`json_keyfile` is required for auth_method: service_account (or json_key)"
136
131
  end
137
132
 
138
- jsonkey_params = nil
139
133
  if task['json_keyfile']
140
134
  begin
141
- jsonkey_params = JSON.parse(task['json_keyfile'])
135
+ json_key = JSON.parse(task['json_keyfile'])
136
+ task['project'] ||= json_key['project_id']
142
137
  rescue => e
143
138
  raise ConfigError.new "json_keyfile is not a JSON file"
144
139
  end
145
140
  end
146
-
147
- if jsonkey_params
148
- task['project'] ||= jsonkey_params['project_id']
149
- end
150
141
  if task['project'].nil?
151
142
  raise ConfigError.new "Required field \"project\" is not set"
152
143
  end
@@ -313,14 +304,14 @@ module Embulk
313
304
  bigquery.create_table_if_not_exists(task['table'])
314
305
  when 'replace'
315
306
  bigquery.create_table_if_not_exists(task['temp_table'])
316
- bigquery.create_table_if_not_exists(task['table'])
307
+ bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
317
308
  when 'append'
318
309
  bigquery.create_table_if_not_exists(task['temp_table'])
319
- bigquery.create_table_if_not_exists(task['table'])
310
+ bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
320
311
  when 'replace_backup'
321
312
  bigquery.create_table_if_not_exists(task['temp_table'])
322
313
  bigquery.create_table_if_not_exists(task['table'])
323
- bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
314
+ bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
324
315
  else # append_direct
325
316
  if task['auto_create_table']
326
317
  bigquery.create_table_if_not_exists(task['table'])
@@ -0,0 +1,35 @@
1
+ require 'googleauth'
2
+
3
+ module Embulk
4
+ module Output
5
+ class Bigquery < OutputPlugin
6
+ class Auth
7
+
8
+ attr_reader :auth_method, :json_key, :scope
9
+
10
+ def initialize(task, scope)
11
+ @auth_method = task['auth_method']
12
+ @json_key = task['json_keyfile']
13
+ @scope = scope
14
+ end
15
+
16
+ def authenticate
17
+ case auth_method
18
+ when 'authorized_user'
19
+ key = StringIO.new(json_key)
20
+ return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
21
+ when 'compute_engine'
22
+ return Google::Auth::GCECredentials.new
23
+ when 'service_account', 'json_key' # json_key is for backward compatibility
24
+ key = StringIO.new(json_key)
25
+ return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
26
+ when 'application_default'
27
+ return Google::Auth.get_application_default([scope])
28
+ else
29
+ raise ConfigError.new("Unknown auth method: #{auth_method}")
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,4 +1,4 @@
1
- require 'google/api_client/auth/key_utils'
1
+ require_relative 'auth'
2
2
 
3
3
  module Embulk
4
4
  module Output
@@ -14,6 +14,7 @@ module Embulk
14
14
  def initialize(task, scope, client_class)
15
15
  @task = task
16
16
  @scope = scope
17
+ @auth = Auth.new(task, scope)
17
18
  @client_class = client_class
18
19
  end
19
20
 
@@ -37,39 +38,7 @@ module Embulk
37
38
  Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
38
39
  Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
39
40
 
40
- case @task['auth_method']
41
- when 'private_key'
42
- private_key_passphrase = 'notasecret'
43
- key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
44
- auth = Signet::OAuth2::Client.new(
45
- token_credential_uri: "https://accounts.google.com/o/oauth2/token",
46
- audience: "https://accounts.google.com/o/oauth2/token",
47
- scope: @scope,
48
- issuer: @task['service_account_email'],
49
- signing_key: key)
50
-
51
- when 'compute_engine'
52
- auth = Google::Auth::GCECredentials.new
53
-
54
- when 'json_key'
55
- json_key = @task['json_keyfile']
56
- if File.exist?(json_key)
57
- auth = File.open(json_key) do |f|
58
- Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
59
- end
60
- else
61
- key = StringIO.new(json_key)
62
- auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
63
- end
64
-
65
- when 'application_default'
66
- auth = Google::Auth.get_application_default([@scope])
67
-
68
- else
69
- raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
70
- end
71
-
72
- client.authorization = auth
41
+ client.authorization = @auth.authenticate
73
42
 
74
43
  @cached_client_expiration = Time.now + 1800
75
44
  @cached_client = client