embulk-output-bigquery 0.5.0 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +29 -4
  3. data/README.md +71 -42
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +11 -20
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  8. data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
  9. data/test/test_bigquery_client.rb +1 -5
  10. data/test/test_configure.rb +4 -12
  11. data/test/test_helper.rb +7 -1
  12. data/test/test_transaction.rb +5 -6
  13. data/test/test_value_converter_factory.rb +86 -0
  14. metadata +29 -51
  15. data/example/config_append_direct_schema_update_options.yml +0 -31
  16. data/example/config_client_options.yml +0 -33
  17. data/example/config_csv.yml +0 -30
  18. data/example/config_delete_in_advance.yml +0 -29
  19. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  20. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  21. data/example/config_expose_errors.yml +0 -30
  22. data/example/config_gcs.yml +0 -32
  23. data/example/config_guess_from_embulk_schema.yml +0 -29
  24. data/example/config_guess_with_column_options.yml +0 -40
  25. data/example/config_gzip.yml +0 -1
  26. data/example/config_jsonl.yml +0 -1
  27. data/example/config_max_threads.yml +0 -34
  28. data/example/config_min_ouput_tasks.yml +0 -34
  29. data/example/config_mode_append.yml +0 -30
  30. data/example/config_mode_append_direct.yml +0 -30
  31. data/example/config_nested_record.yml +0 -1
  32. data/example/config_payload_column.yml +0 -20
  33. data/example/config_payload_column_index.yml +0 -20
  34. data/example/config_progress_log_interval.yml +0 -31
  35. data/example/config_replace.yml +0 -30
  36. data/example/config_replace_backup.yml +0 -32
  37. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  38. data/example/config_replace_backup_partitioned_table.yml +0 -34
  39. data/example/config_replace_field_partitioned_table.yml +0 -33
  40. data/example/config_replace_partitioned_table.yml +0 -33
  41. data/example/config_replace_schema_update_options.yml +0 -33
  42. data/example/config_skip_file_generation.yml +0 -32
  43. data/example/config_table_strftime.yml +0 -30
  44. data/example/config_template_table.yml +0 -21
  45. data/example/config_uncompressed.yml +0 -1
  46. data/example/config_with_rehearsal.yml +0 -33
  47. data/example/example.csv +0 -17
  48. data/example/example.yml +0 -1
  49. data/example/example2_1.csv +0 -1
  50. data/example/example2_2.csv +0 -1
  51. data/example/example4_1.csv +0 -1
  52. data/example/example4_2.csv +0 -1
  53. data/example/example4_3.csv +0 -1
  54. data/example/example4_4.csv +0 -1
  55. data/example/json_key.json +0 -12
  56. data/example/nested_example.jsonl +0 -16
  57. data/example/schema.json +0 -30
  58. data/example/schema_expose_errors.json +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 3e0087103039718cb24224b6bb793d820b53b935194d412e4b2984aba3d7d7a8
4
- data.tar.gz: 9ac27a3b881277450cbfaa096de0690c721a8f86f0e78abb692c8a4ed5b679d5
2
+ SHA1:
3
+ metadata.gz: 7ea0cf04e91a092e3d97bf3e46d1c181aab4943f
4
+ data.tar.gz: 60e970acbc16128189df8c274a832d23160e4c80
5
5
  SHA512:
6
- metadata.gz: 6b0ccf4e349a5d15321cfcc97138a98676bddfd412fd6fadfc8b1e0d6cd31d9739a8a5f46ccd923644543ae43cc0134b3e7598f80d89c330a4ac8aec49c084c1
7
- data.tar.gz: f02557cdd7956620ae59eb6bc0e5872992d20a65881bd69230b0b0442342a36203d1eedd8a20702d2000f412b909359657bfa300b3e82b5f494398ea6e5ea301
6
+ metadata.gz: 14fb288ad9781515a28cf72869ca7e76081202b8cf7e29d2448bb9ec37e6a8e8e73a046930f53c41785551b354a990ccc8ee2f9298b418de672c6dfaa2e6447b
7
+ data.tar.gz: 9f8f59c89cf7cc9974ab8a287ffbf522263ff5e81ff988de95f22c9086cf805d39d8f9c7c7d41310357464a29e5ec0404fe819b5ba41de36eb0d870c4ca31144
data/CHANGELOG.md CHANGED
@@ -1,9 +1,34 @@
1
+ ## 0.6.4 - 2019-11-06
2
+
3
+ * [enhancement] Add DATETIME type conveter (thanks to @kekekenta)
4
+
5
+ ## 0.6.3 - 2019-10-28
6
+
7
+ * [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
8
+
9
+ ## 0.6.2 - 2019-10-16
10
+
11
+ * [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
12
+
13
+ ## 0.6.1 - 2019-08-28
14
+
15
+ * [maintenance] Release a new gem not to include symlinks to make it work on Windows.
16
+
17
+ ## 0.6.0 - 2019-08-11
18
+
19
+ Cleanup `auth_method`:
20
+
21
+ * [enhancement] Support `auth_method: authorized_user` (OAuth)
22
+ * [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
23
+ * [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
24
+ * [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
25
+
1
26
  ## 0.5.0 - 2019-08-10
2
27
 
3
- * [incompatibility change] Drop deprecated time\_partitioning.require\_partition\_filter
4
- * [incompatibility change] Drop prevent\_duplicate\_insert which has no use-case now
5
- * [incompatibility change] Change default value of `auto\_create\_table` to `true` from `false`
6
- * Modes `replace`, `replace_backup`, `append`, `delete_in_advance`, that is, except `append_direct` requires `auto_create_table: true`.
28
+ * [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
29
+ * [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
30
+ * [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
31
+ * [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
7
32
 
8
33
  ## 0.4.14 - 2019-08-10
9
34
 
data/README.md CHANGED
@@ -29,17 +29,15 @@ OAuth flow for installed applications.
29
29
 
30
30
  | name | type | required? | default | description |
31
31
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
32
- | mode | string | optional | "append" | See [Mode](#mode) |
33
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
34
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
35
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
36
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
37
- | project | string | required if json_keyfile is not given | | project_id |
32
+ | mode | string | optional | "append" | See [Mode](#mode) |
33
+ | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
+ | json_keyfile | string | optional | | keyfile path or `content` |
35
+ | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
38
36
  | dataset | string | required | | dataset |
39
37
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
40
38
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
41
39
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
42
- | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes requires `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
40
+ | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
43
41
  | schema_file | string | optional | | /path/to/schema.json |
44
42
  | template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
45
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
@@ -108,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
108
106
  out:
109
107
  type: bigquery
110
108
  mode: append
111
- auth_method: private_key # default
112
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
113
- p12_keyfile: /path/to/p12_keyfile.p12
109
+ auth_method: service_account
110
+ json_keyfile: /path/to/json_keyfile.json
114
111
  project: your-project-000
115
112
  dataset: your_dataset_name
116
113
  table: your_table_name
@@ -118,7 +115,7 @@ out:
118
115
  source_format: NEWLINE_DELIMITED_JSON
119
116
  ```
120
117
 
121
- ### location
118
+ ### Location
122
119
 
123
120
  The geographic location of the dataset. Required except for US and EU.
124
121
 
@@ -126,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
126
123
 
127
124
  See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
128
125
 
129
- ### mode
126
+ ### Mode
130
127
 
131
128
  5 modes are provided.
132
129
 
@@ -165,53 +162,69 @@ NOTE: BigQuery does not support replacing (actually, copying into) a non-partiti
165
162
 
166
163
  ### Authentication
167
164
 
168
- There are three methods supported to fetch access token for the service account.
165
+ There are four authentication methods
166
+
167
+ 1. `service_account` (or `json_key` for backward compatibility)
168
+ 1. `authorized_user`
169
+ 1. `compute_engine`
170
+ 1. `application_default`
169
171
 
170
- 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
171
- 2. JSON key of GCP(Google Cloud Platform)'s service account
172
- 3. Pre-defined access token (Google Compute Engine only)
172
+ #### service\_account (or json\_key)
173
173
 
174
- #### Public-Private key pair of GCP's service account
174
+ Use GCP service account credentials.
175
+ You first need to create a service account, download its json key and deploy the key with embulk.
176
+
177
+ ```yaml
178
+ out:
179
+ type: bigquery
180
+ auth_method: service_account
181
+ json_keyfile: /path/to/json_keyfile.json
182
+ ```
175
183
 
176
- You first need to create a service account (client ID),
177
- download its private key and deploy the key with embulk.
184
+ You can also embed contents of `json_keyfile` at config.yml.
178
185
 
179
186
  ```yaml
180
187
  out:
181
188
  type: bigquery
182
- auth_method: private_key # default
183
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
184
- p12_keyfile: /path/to/p12_keyfile.p12
189
+ auth_method: service_account
190
+ json_keyfile:
191
+ content: |
192
+ {
193
+ "private_key_id": "123456789",
194
+ "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
195
+ "client_email": "..."
196
+ }
185
197
  ```
186
198
 
187
- #### JSON key of GCP's service account
199
+ #### authorized\_user
188
200
 
189
- You first need to create a service account (client ID),
190
- download its json key and deploy the key with embulk.
201
+ Use Google user credentials.
202
+ You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
191
203
 
192
204
  ```yaml
193
205
  out:
194
206
  type: bigquery
195
- auth_method: json_key
196
- json_keyfile: /path/to/json_keyfile.json
207
+ auth_method: authorized_user
208
+ json_keyfile: /path/to/credentials.json
197
209
  ```
198
210
 
199
- You can also embed contents of json_keyfile at config.yml.
211
+ You can also embed contents of `json_keyfile` at config.yml.
200
212
 
201
213
  ```yaml
202
214
  out:
203
215
  type: bigquery
204
- auth_method: json_key
216
+ auth_method: authorized_user
205
217
  json_keyfile:
206
218
  content: |
207
219
  {
208
- "private_key_id": "123456789",
209
- "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
210
- "client_email": "..."
211
- }
220
+ "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
221
+ "client_secret":"xxxxxxxxxxx",
222
+ "refresh_token":"xxxxxxxxxxx",
223
+ "type":"authorized_user"
224
+ }
212
225
  ```
213
226
 
214
- #### Pre-defined access token(GCE only)
227
+ #### compute\_engine
215
228
 
216
229
  On the other hand, you don't need to explicitly create a service account for embulk when you
217
230
  run embulk in Google Compute Engine. In this third authentication method, you need to
@@ -224,6 +237,22 @@ out:
224
237
  auth_method: compute_engine
225
238
  ```
226
239
 
240
+ #### application\_default
241
+
242
+ Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
243
+
244
+ 1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
245
+ 2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
246
+ 3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
247
+
248
+ See https://cloud.google.com/docs/authentication/production for details.
249
+
250
+ ```yaml
251
+ out:
252
+ type: bigquery
253
+ auth_method: application_default
254
+ ```
255
+
227
256
  ### Table id formatting
228
257
 
229
258
  `table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
@@ -232,12 +261,12 @@ Table ids are formatted at runtime
232
261
  using the local time of the embulk server.
233
262
 
234
263
  For example, with the configuration below,
235
- data is inserted into tables `table_2015_04`, `table_2015_05` and so on.
264
+ data is inserted into tables `table_20150503`, `table_20150504` and so on.
236
265
 
237
266
  ```yaml
238
267
  out:
239
268
  type: bigquery
240
- table: table_%Y_%m
269
+ table: table_%Y%m%d
241
270
  ```
242
271
 
243
272
  ### Dynamic table creating
@@ -252,7 +281,7 @@ Please set file path of schema.json.
252
281
  out:
253
282
  type: bigquery
254
283
  auto_create_table: true
255
- table: table_%Y_%m
284
+ table: table_%Y%m%d
256
285
  schema_file: /path/to/schema.json
257
286
  ```
258
287
 
@@ -264,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
264
293
  out:
265
294
  type: bigquery
266
295
  auto_create_table: true
267
- table: table_%Y_%m
296
+ table: table_%Y%m%d
268
297
  template_table: existing_table_name
269
298
  ```
270
299
 
@@ -278,17 +307,17 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
278
307
 
279
308
  - **column_options**: advanced: an array of options for columns
280
309
  - **name**: column name
281
- - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
310
+ - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, and `RECORD`. See belows for supported conversion type.
282
311
  - boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
283
312
  - long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
284
313
  - double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
285
- - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
286
- - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
314
+ - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE`, `RECORD` (default: `STRING`)
315
+ - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATETIME`, `DATE` (default: `TIMESTAMP`)
287
316
  - json: `STRING`, `RECORD` (default: `STRING`)
288
317
  - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
289
318
  - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
290
319
  - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
291
- - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
320
+ - **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
292
321
  - **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
293
322
  - **default_timezone**: default timezone for column_options (string, default is "UTC")
294
323
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.5.0"
3
+ spec.version = "0.6.4"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
8
8
  spec.licenses = ["MIT"]
9
9
  spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
10
 
11
- spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
11
+ # Exclude example directory which uses symlinks from generating gem.
12
+ # Symlinks do not work properly on the Windows platform without administrator privilege.
13
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
12
14
  spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
15
  spec.require_paths = ["lib"]
14
16
 
15
- spec.add_dependency 'google-api-client'
17
+ # TODO
18
+ # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
+ # So, Force install signet < 0.12 and google-api-client < 0.33.0
21
+ spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
22
+ spec.add_dependency 'google-api-client','< 0.33.0'
16
23
  spec.add_dependency 'time_with_zone'
17
24
 
18
25
  spec.add_development_dependency 'bundler', ['>= 1.10.6']
@@ -23,7 +23,7 @@ module Embulk
23
23
  # @return JSON string
24
24
  def self.load(v)
25
25
  if v.is_a?(String) # path
26
- File.read(v)
26
+ File.read(File.expand_path(v))
27
27
  elsif v.is_a?(Hash)
28
28
  v['content']
29
29
  end
@@ -33,9 +33,7 @@ module Embulk
33
33
  def self.configure(config, schema, task_count)
34
34
  task = {
35
35
  'mode' => config.param('mode', :string, :default => 'append'),
36
- 'auth_method' => config.param('auth_method', :string, :default => 'private_key'),
37
- 'service_account_email' => config.param('service_account_email', :string, :default => nil),
38
- 'p12_keyfile' => config.param('p12_keyfile', :string, :default => nil),
36
+ 'auth_method' => config.param('auth_method', :string, :default => 'application_default'),
39
37
  'json_keyfile' => config.param('json_keyfile', LocalFile, :default => nil),
40
38
  'project' => config.param('project', :string, :default => nil),
41
39
  'dataset' => config.param('dataset', :string),
@@ -125,28 +123,21 @@ module Embulk
125
123
  end
126
124
 
127
125
  task['auth_method'] = task['auth_method'].downcase
128
- unless %w[private_key json_key compute_engine application_default].include?(task['auth_method'])
129
- raise ConfigError.new "`auth_method` must be one of private_key, json_key, compute_engine, application_default"
126
+ unless %w[json_key service_account authorized_user compute_engine application_default].include?(task['auth_method'])
127
+ raise ConfigError.new "`auth_method` must be one of service_account (or json_key), authorized_user, compute_engine, application_default"
130
128
  end
131
- if task['auth_method'] == 'private_key' and task['p12_keyfile'].nil?
132
- raise ConfigError.new "`p12_keyfile` is required for auth_method private_key"
133
- end
134
- if task['auth_method'] == 'json_key' and task['json_keyfile'].nil?
135
- raise ConfigError.new "`json_keyfile` is required for auth_method json_key"
129
+ if (task['auth_method'] == 'service_account' or task['auth_method'] == 'json_key') and task['json_keyfile'].nil?
130
+ raise ConfigError.new "`json_keyfile` is required for auth_method: service_account (or json_key)"
136
131
  end
137
132
 
138
- jsonkey_params = nil
139
133
  if task['json_keyfile']
140
134
  begin
141
- jsonkey_params = JSON.parse(task['json_keyfile'])
135
+ json_key = JSON.parse(task['json_keyfile'])
136
+ task['project'] ||= json_key['project_id']
142
137
  rescue => e
143
138
  raise ConfigError.new "json_keyfile is not a JSON file"
144
139
  end
145
140
  end
146
-
147
- if jsonkey_params
148
- task['project'] ||= jsonkey_params['project_id']
149
- end
150
141
  if task['project'].nil?
151
142
  raise ConfigError.new "Required field \"project\" is not set"
152
143
  end
@@ -313,14 +304,14 @@ module Embulk
313
304
  bigquery.create_table_if_not_exists(task['table'])
314
305
  when 'replace'
315
306
  bigquery.create_table_if_not_exists(task['temp_table'])
316
- bigquery.create_table_if_not_exists(task['table'])
307
+ bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
317
308
  when 'append'
318
309
  bigquery.create_table_if_not_exists(task['temp_table'])
319
- bigquery.create_table_if_not_exists(task['table'])
310
+ bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
320
311
  when 'replace_backup'
321
312
  bigquery.create_table_if_not_exists(task['temp_table'])
322
313
  bigquery.create_table_if_not_exists(task['table'])
323
- bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
314
+ bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
324
315
  else # append_direct
325
316
  if task['auto_create_table']
326
317
  bigquery.create_table_if_not_exists(task['table'])
@@ -0,0 +1,35 @@
1
+ require 'googleauth'
2
+
3
+ module Embulk
4
+ module Output
5
+ class Bigquery < OutputPlugin
6
+ class Auth
7
+
8
+ attr_reader :auth_method, :json_key, :scope
9
+
10
+ def initialize(task, scope)
11
+ @auth_method = task['auth_method']
12
+ @json_key = task['json_keyfile']
13
+ @scope = scope
14
+ end
15
+
16
+ def authenticate
17
+ case auth_method
18
+ when 'authorized_user'
19
+ key = StringIO.new(json_key)
20
+ return Google::Auth::UserRefreshCredentials.make_creds(json_key_io: key, scope: scope)
21
+ when 'compute_engine'
22
+ return Google::Auth::GCECredentials.new
23
+ when 'service_account', 'json_key' # json_key is for backward compatibility
24
+ key = StringIO.new(json_key)
25
+ return Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
26
+ when 'application_default'
27
+ return Google::Auth.get_application_default([scope])
28
+ else
29
+ raise ConfigError.new("Unknown auth method: #{auth_method}")
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,4 +1,4 @@
1
- require 'google/api_client/auth/key_utils'
1
+ require_relative 'auth'
2
2
 
3
3
  module Embulk
4
4
  module Output
@@ -14,6 +14,7 @@ module Embulk
14
14
  def initialize(task, scope, client_class)
15
15
  @task = task
16
16
  @scope = scope
17
+ @auth = Auth.new(task, scope)
17
18
  @client_class = client_class
18
19
  end
19
20
 
@@ -37,39 +38,7 @@ module Embulk
37
38
  Embulk.logger.debug { "embulk-output-bigquery: client_options: #{client.client_options.to_h}" }
38
39
  Embulk.logger.debug { "embulk-output-bigquery: request_options: #{client.request_options.to_h}" }
39
40
 
40
- case @task['auth_method']
41
- when 'private_key'
42
- private_key_passphrase = 'notasecret'
43
- key = Google::APIClient::KeyUtils.load_from_pkcs12(@task['p12_keyfile'], private_key_passphrase)
44
- auth = Signet::OAuth2::Client.new(
45
- token_credential_uri: "https://accounts.google.com/o/oauth2/token",
46
- audience: "https://accounts.google.com/o/oauth2/token",
47
- scope: @scope,
48
- issuer: @task['service_account_email'],
49
- signing_key: key)
50
-
51
- when 'compute_engine'
52
- auth = Google::Auth::GCECredentials.new
53
-
54
- when 'json_key'
55
- json_key = @task['json_keyfile']
56
- if File.exist?(json_key)
57
- auth = File.open(json_key) do |f|
58
- Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: @scope)
59
- end
60
- else
61
- key = StringIO.new(json_key)
62
- auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: @scope)
63
- end
64
-
65
- when 'application_default'
66
- auth = Google::Auth.get_application_default([@scope])
67
-
68
- else
69
- raise ConfigError, "Unknown auth method: #{@task['auth_method']}"
70
- end
71
-
72
- client.authorization = auth
41
+ client.authorization = @auth.authenticate
73
42
 
74
43
  @cached_client_expiration = Time.now + 1800
75
44
  @cached_client = client