embulk-output-bigquery 0.4.13 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +10 -6
- data/CHANGELOG.md +28 -0
- data/Gemfile +2 -0
- data/README.md +107 -75
- data/embulk-output-bigquery.gemspec +10 -3
- data/lib/embulk/output/bigquery.rb +31 -50
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +24 -31
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/helper.rb +8 -4
- data/test/helper.rb +2 -1
- data/test/test_bigquery_client.rb +17 -21
- data/test/test_configure.rb +10 -19
- data/test/test_example.rb +5 -4
- data/test/test_transaction.rb +36 -76
- metadata +27 -49
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_prevent_duplicate_insert.yml +0 -30
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_paritioned_table.yml +0 -34
- data/example/config_replace_paritioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.jsonl +0 -16
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2168730943154d9fb8d8ebfce9e4a1c2130b16b5
|
4
|
+
data.tar.gz: 8c4549b91f75d3e7a874f310e0df791bd9c28030
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae7e67855daddce745e3d9a8c5ee659d99c7e84510365bf216f63fb7ed92b9ce166cb2b005b76680020f67354a6f97cb3596d9b15490630cf1b927968e5b8f0e
|
7
|
+
data.tar.gz: 83531a34355dbf3af0ec602db9318ad52e0b6b9ba84b5ea1bef15f4a50b67fa69b664e0bf62deffe8f37ac62d9b995a6c5a135b69e5cf365df06d97f32264cfa
|
data/.travis.yml
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
language: ruby
|
2
2
|
matrix:
|
3
3
|
include:
|
4
|
-
- env: EMBULK_VERSION=0.8.39
|
5
|
-
rvm: jruby-9.1.5.0 # bundled jruby version
|
6
|
-
jdk: openjdk7 # embulk 0.8.x uses jdk7
|
7
4
|
- env: EMBULK_VERSION=0.9.15
|
8
|
-
rvm: jruby-9.1.
|
5
|
+
rvm: jruby-9.1.15.0 # bundled jruby version
|
9
6
|
jdk: openjdk8 # embulk 0.9.x uses jdk8
|
10
7
|
- env: EMBULK_VERSION=latest
|
11
|
-
rvm: jruby-9.1.
|
8
|
+
rvm: jruby-9.1.15.0 # ?
|
12
9
|
jdk: openjdk8 # ?
|
13
10
|
allow_failures:
|
14
11
|
- env: EMBULK_VERSION=latest
|
15
12
|
before_install:
|
16
13
|
- curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
|
17
|
-
|
14
|
+
- chmod a+x embulk.jar
|
15
|
+
- BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
|
16
|
+
- gem uninstall bundler -x
|
17
|
+
- gem install bundler -v ${BUNDLER_VERSION}
|
18
|
+
install:
|
19
|
+
- ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
|
20
|
+
script:
|
21
|
+
- bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,31 @@
|
|
1
|
+
## 0.6.2 - 2019-10-16
|
2
|
+
|
3
|
+
* [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
|
4
|
+
|
5
|
+
## 0.6.1 - 2019-08-28
|
6
|
+
|
7
|
+
* [maintenance] Release a new gem not to include symlinks to make it work on Windows.
|
8
|
+
|
9
|
+
## 0.6.0 - 2019-08-11
|
10
|
+
|
11
|
+
Cleanup `auth_method`:
|
12
|
+
|
13
|
+
* [enhancement] Support `auth_method: authorized_user` (OAuth)
|
14
|
+
* [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
|
15
|
+
* [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
|
16
|
+
* [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
|
17
|
+
|
18
|
+
## 0.5.0 - 2019-08-10
|
19
|
+
|
20
|
+
* [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
|
21
|
+
* [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
|
22
|
+
* [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
|
23
|
+
* [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
|
24
|
+
|
25
|
+
## 0.4.14 - 2019-08-10
|
26
|
+
|
27
|
+
* [enhancement] Support field partitioning correctly.
|
28
|
+
|
1
29
|
## 0.4.13 - 2019-03-20
|
2
30
|
|
3
31
|
* [enhancement] Support clustered table as an experimental feature
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -23,34 +23,23 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
|
|
23
23
|
Current version of this plugin supports Google API with Service Account Authentication, but does not support
|
24
24
|
OAuth flow for installed applications.
|
25
25
|
|
26
|
-
### INCOMPATIBILITY CHANGES
|
27
|
-
|
28
|
-
v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGELOG.md) for details.
|
29
|
-
|
30
|
-
* `formatter` option (formatter plugin support) is dropped. Use `source_format` option instead. (it already exists in v0.2.x too)
|
31
|
-
* `encoders` option (encoder plugin support) is dropped. Use `compression` option instead (it already exists in v0.2.x too).
|
32
|
-
* `mode: append` mode now expresses a transactional append, and `mode: append_direct` is one which is not transactional.
|
33
|
-
|
34
26
|
## Configuration
|
35
27
|
|
36
28
|
#### Original options
|
37
29
|
|
38
30
|
| name | type | required? | default | description |
|
39
31
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
40
|
-
| mode | string | optional | "append" | See [Mode](#mode)
|
41
|
-
| auth_method | string | optional | "
|
42
|
-
|
|
43
|
-
|
|
44
|
-
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
45
|
-
| project | string | required if json_keyfile is not given | | project_id |
|
32
|
+
| mode | string | optional | "append" | See [Mode](#mode) |
|
33
|
+
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
|
+
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
|
+
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
46
36
|
| dataset | string | required | | dataset |
|
47
37
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
48
38
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
49
39
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
50
|
-
| auto_create_table | boolean | optional |
|
40
|
+
| auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
|
51
41
|
| schema_file | string | optional | | /path/to/schema.json |
|
52
42
|
| template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
|
53
|
-
| prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
|
54
43
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
55
44
|
| job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
|
56
45
|
| is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
|
@@ -107,8 +96,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
107
96
|
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
108
97
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
98
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
|
-
|
|
111
|
-
| clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
99
|
+
| clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
112
100
|
| clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
113
101
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
114
102
|
|
@@ -118,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
118
106
|
out:
|
119
107
|
type: bigquery
|
120
108
|
mode: append
|
121
|
-
auth_method:
|
122
|
-
|
123
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
109
|
+
auth_method: service_account
|
110
|
+
json_keyfile: /path/to/json_keyfile.json
|
124
111
|
project: your-project-000
|
125
112
|
dataset: your_dataset_name
|
126
113
|
table: your_table_name
|
@@ -128,7 +115,7 @@ out:
|
|
128
115
|
source_format: NEWLINE_DELIMITED_JSON
|
129
116
|
```
|
130
117
|
|
131
|
-
###
|
118
|
+
### Location
|
132
119
|
|
133
120
|
The geographic location of the dataset. Required except for US and EU.
|
134
121
|
|
@@ -136,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
|
|
136
123
|
|
137
124
|
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
138
125
|
|
139
|
-
###
|
126
|
+
### Mode
|
140
127
|
|
141
128
|
5 modes are provided.
|
142
129
|
|
@@ -158,6 +145,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
|
|
158
145
|
|
159
146
|
```is_skip_job_result_check``` must be false when replace mode
|
160
147
|
|
148
|
+
NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
|
149
|
+
|
161
150
|
##### replace_backup
|
162
151
|
|
163
152
|
1. Load to temporary table (Create and WRITE_APPEND in parallel)
|
@@ -173,53 +162,69 @@ This is not transactional, i.e., if fails, the target table could have some rows
|
|
173
162
|
|
174
163
|
### Authentication
|
175
164
|
|
176
|
-
There are
|
165
|
+
There are four authentication methods
|
177
166
|
|
178
|
-
1.
|
179
|
-
|
180
|
-
|
167
|
+
1. `service_account` (or `json_key` for backward compatibility)
|
168
|
+
1. `authorized_user`
|
169
|
+
1. `compute_engine`
|
170
|
+
1. `application_default`
|
181
171
|
|
182
|
-
####
|
172
|
+
#### service\_account (or json\_key)
|
183
173
|
|
184
|
-
|
185
|
-
download its
|
174
|
+
Use GCP service account credentials.
|
175
|
+
You first need to create a service account, download its json key and deploy the key with embulk.
|
186
176
|
|
187
177
|
```yaml
|
188
178
|
out:
|
189
179
|
type: bigquery
|
190
|
-
auth_method:
|
191
|
-
|
192
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
180
|
+
auth_method: service_account
|
181
|
+
json_keyfile: /path/to/json_keyfile.json
|
193
182
|
```
|
194
183
|
|
195
|
-
|
184
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
196
185
|
|
197
|
-
|
198
|
-
|
186
|
+
```yaml
|
187
|
+
out:
|
188
|
+
type: bigquery
|
189
|
+
auth_method: service_account
|
190
|
+
json_keyfile:
|
191
|
+
content: |
|
192
|
+
{
|
193
|
+
"private_key_id": "123456789",
|
194
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
195
|
+
"client_email": "..."
|
196
|
+
}
|
197
|
+
```
|
198
|
+
|
199
|
+
#### authorized\_user
|
200
|
+
|
201
|
+
Use Google user credentials.
|
202
|
+
You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
|
199
203
|
|
200
204
|
```yaml
|
201
205
|
out:
|
202
206
|
type: bigquery
|
203
|
-
auth_method:
|
204
|
-
json_keyfile: /path/to/
|
207
|
+
auth_method: authorized_user
|
208
|
+
json_keyfile: /path/to/credentials.json
|
205
209
|
```
|
206
210
|
|
207
|
-
You can also embed contents of json_keyfile at config.yml.
|
211
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
208
212
|
|
209
213
|
```yaml
|
210
214
|
out:
|
211
215
|
type: bigquery
|
212
|
-
auth_method:
|
216
|
+
auth_method: authorized_user
|
213
217
|
json_keyfile:
|
214
218
|
content: |
|
215
219
|
{
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
+
"client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
|
221
|
+
"client_secret":"xxxxxxxxxxx",
|
222
|
+
"refresh_token":"xxxxxxxxxxx",
|
223
|
+
"type":"authorized_user"
|
224
|
+
}
|
220
225
|
```
|
221
226
|
|
222
|
-
####
|
227
|
+
#### compute\_engine
|
223
228
|
|
224
229
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
225
230
|
run embulk in Google Compute Engine. In this third authentication method, you need to
|
@@ -232,6 +237,22 @@ out:
|
|
232
237
|
auth_method: compute_engine
|
233
238
|
```
|
234
239
|
|
240
|
+
#### application\_default
|
241
|
+
|
242
|
+
Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
|
243
|
+
|
244
|
+
1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
|
245
|
+
2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
|
246
|
+
3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
|
247
|
+
|
248
|
+
See https://cloud.google.com/docs/authentication/production for details.
|
249
|
+
|
250
|
+
```yaml
|
251
|
+
out:
|
252
|
+
type: bigquery
|
253
|
+
auth_method: application_default
|
254
|
+
```
|
255
|
+
|
235
256
|
### Table id formatting
|
236
257
|
|
237
258
|
`table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
@@ -240,20 +261,16 @@ Table ids are formatted at runtime
|
|
240
261
|
using the local time of the embulk server.
|
241
262
|
|
242
263
|
For example, with the configuration below,
|
243
|
-
data is inserted into tables `
|
264
|
+
data is inserted into tables `table_20150503`, `table_20150504` and so on.
|
244
265
|
|
245
266
|
```yaml
|
246
267
|
out:
|
247
268
|
type: bigquery
|
248
|
-
table: table_%
|
269
|
+
table: table_%Y%m%d
|
249
270
|
```
|
250
271
|
|
251
272
|
### Dynamic table creating
|
252
273
|
|
253
|
-
When `auto_create_table` is set to true, try to create the table using BigQuery API.
|
254
|
-
|
255
|
-
If table already exists, insert into it.
|
256
|
-
|
257
274
|
There are 3 ways to set schema.
|
258
275
|
|
259
276
|
#### Set schema.json
|
@@ -264,7 +281,7 @@ Please set file path of schema.json.
|
|
264
281
|
out:
|
265
282
|
type: bigquery
|
266
283
|
auto_create_table: true
|
267
|
-
table: table_%
|
284
|
+
table: table_%Y%m%d
|
268
285
|
schema_file: /path/to/schema.json
|
269
286
|
```
|
270
287
|
|
@@ -276,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
|
|
276
293
|
out:
|
277
294
|
type: bigquery
|
278
295
|
auto_create_table: true
|
279
|
-
table: table_%
|
296
|
+
table: table_%Y%m%d
|
280
297
|
template_table: existing_table_name
|
281
298
|
```
|
282
299
|
|
@@ -352,25 +369,9 @@ out:
|
|
352
369
|
payload_column_index: 0 # or, payload_column: payload
|
353
370
|
```
|
354
371
|
|
355
|
-
### Prevent Duplication
|
356
|
-
|
357
|
-
`prevent_duplicate_insert` option is used to prevent inserting same data for modes `append` or `append_direct`.
|
358
|
-
|
359
|
-
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options.
|
360
|
-
|
361
|
-
`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
|
362
|
-
|
363
|
-
[job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency) so that same data can't be inserted with same settings repeatedly.
|
364
|
-
|
365
|
-
```yaml
|
366
|
-
out:
|
367
|
-
type: bigquery
|
368
|
-
prevent_duplicate_insert: true
|
369
|
-
```
|
370
|
-
|
371
372
|
### GCS Bucket
|
372
373
|
|
373
|
-
This is useful to reduce number of consumed jobs, which is limited by [
|
374
|
+
This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
|
374
375
|
|
375
376
|
This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
|
376
377
|
|
@@ -398,32 +399,31 @@ To load into a partition, specify `table` parameter with a partition decorator a
|
|
398
399
|
out:
|
399
400
|
type: bigquery
|
400
401
|
table: table_name$20160929
|
401
|
-
auto_create_table: true
|
402
402
|
```
|
403
403
|
|
404
|
-
You may configure `time_partitioning` parameter together
|
404
|
+
You may configure `time_partitioning` parameter together as:
|
405
405
|
|
406
406
|
```yaml
|
407
407
|
out:
|
408
408
|
type: bigquery
|
409
409
|
table: table_name$20160929
|
410
|
-
auto_create_table: true
|
411
410
|
time_partitioning:
|
412
411
|
type: DAY
|
413
412
|
expiration_ms: 259200000
|
414
413
|
```
|
415
414
|
|
416
415
|
You can also create column-based partitioning table as:
|
416
|
+
|
417
417
|
```yaml
|
418
418
|
out:
|
419
419
|
type: bigquery
|
420
420
|
mode: replace
|
421
|
-
auto_create_table: true
|
422
421
|
table: table_name
|
423
422
|
time_partitioning:
|
424
423
|
type: DAY
|
425
424
|
field: timestamp
|
426
425
|
```
|
426
|
+
|
427
427
|
Note the `time_partitioning.field` should be top-level `DATE` or `TIMESTAMP`.
|
428
428
|
|
429
429
|
Use [Tables: patch](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) API to update the schema of the partitioned table, embulk-output-bigquery itself does not support it, though.
|
@@ -448,8 +448,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
|
|
448
448
|
|
449
449
|
Place your embulk with `.jar` extension:
|
450
450
|
|
451
|
+
|
452
|
+
```
|
453
|
+
$ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
|
454
|
+
$ chmod a+x embulk.jar
|
455
|
+
```
|
456
|
+
|
457
|
+
Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
|
458
|
+
|
459
|
+
```
|
460
|
+
$ echo JRUBY_VERSION | ./embulk.jar irb
|
461
|
+
2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
|
462
|
+
Switch to inspect mode.
|
463
|
+
JRUBY_VERSION
|
464
|
+
"X.X.X.X"
|
465
|
+
|
466
|
+
$ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
|
467
|
+
2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
|
468
|
+
Switch to inspect mode.
|
469
|
+
require 'bundler'; Bundler::VERSION
|
470
|
+
"Y.Y.Y"
|
471
|
+
```
|
472
|
+
|
473
|
+
Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
|
474
|
+
|
475
|
+
```
|
476
|
+
$ rbenv install jruby-X.X.X.X
|
477
|
+
$ rbenv local jruby-X.X.X.X
|
478
|
+
$ gem install bundler -v Y.Y.Y
|
479
|
+
```
|
480
|
+
|
481
|
+
Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
|
482
|
+
|
451
483
|
```
|
452
|
-
$
|
484
|
+
$ ./embulk.jar bundle install --path vendor/bundle
|
453
485
|
```
|
454
486
|
|
455
487
|
Run tests with `env RUBYOPT="-r ./embulk.jar`:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.6.2"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.licenses = ["MIT"]
|
9
9
|
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
10
10
|
|
11
|
-
|
11
|
+
# Exclude example directory which uses symlinks from generating gem.
|
12
|
+
# Symlinks do not work properly on the Windows platform without administrator privilege.
|
13
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
|
12
14
|
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
15
|
spec.require_paths = ["lib"]
|
14
16
|
|
15
|
-
|
17
|
+
# TODO
|
18
|
+
# signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
|
19
|
+
# Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
|
20
|
+
# So, Force install signet < 0.12 and google-api-client < 0.33.0
|
21
|
+
spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
|
22
|
+
spec.add_dependency 'google-api-client','< 0.33.0'
|
16
23
|
spec.add_dependency 'time_with_zone'
|
17
24
|
|
18
25
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|