embulk-output-bigquery 0.4.12 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +10 -6
- data/CHANGELOG.md +28 -0
- data/Gemfile +2 -0
- data/README.md +109 -75
- data/embulk-output-bigquery.gemspec +4 -2
- data/lib/embulk/output/bigquery.rb +38 -50
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/bigquery_client.rb +31 -31
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/helper.rb +8 -4
- data/test/helper.rb +2 -1
- data/test/test_bigquery_client.rb +17 -21
- data/test/test_configure.rb +19 -19
- data/test/test_example.rb +5 -4
- data/test/test_transaction.rb +36 -76
- metadata +3 -45
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_prevent_duplicate_insert.yml +0 -30
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_paritioned_table.yml +0 -34
- data/example/config_replace_paritioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.jsonl +0 -16
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ddfd10c5e85614e1dae0333494333653f1af95b8158dfda8977f8b00d64b3478
|
4
|
+
data.tar.gz: 2cec70eaa49c828d7fe9347bc0d9699b9398f21db96880e997a66bdab23deb89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4782a28272da610f8399aca50cc4ddaefea00b8dbf45a37bec24771d7ecdb05bbdcd6de85ff167c5c3745f6689413c215689bb8d420960705cd6cb2026e99932
|
7
|
+
data.tar.gz: 9dbabb787e2f1b5797ccb2a2cd8786ce28d0e0d01310cd522ea4894337a279e809de10abca14b50b836553b6de95df4afd886596d75e7193d4de60a5c6f95781
|
data/.travis.yml
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
language: ruby
|
2
2
|
matrix:
|
3
3
|
include:
|
4
|
-
- env: EMBULK_VERSION=0.8.39
|
5
|
-
rvm: jruby-9.1.5.0 # bundled jruby version
|
6
|
-
jdk: openjdk7 # embulk 0.8.x uses jdk7
|
7
4
|
- env: EMBULK_VERSION=0.9.15
|
8
|
-
rvm: jruby-9.1.
|
5
|
+
rvm: jruby-9.1.15.0 # bundled jruby version
|
9
6
|
jdk: openjdk8 # embulk 0.9.x uses jdk8
|
10
7
|
- env: EMBULK_VERSION=latest
|
11
|
-
rvm: jruby-9.1.
|
8
|
+
rvm: jruby-9.1.15.0 # ?
|
12
9
|
jdk: openjdk8 # ?
|
13
10
|
allow_failures:
|
14
11
|
- env: EMBULK_VERSION=latest
|
15
12
|
before_install:
|
16
13
|
- curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
|
17
|
-
|
14
|
+
- chmod a+x embulk.jar
|
15
|
+
- BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
|
16
|
+
- gem uninstall bundler -x
|
17
|
+
- gem install bundler -v ${BUNDLER_VERSION}
|
18
|
+
install:
|
19
|
+
- ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
|
20
|
+
script:
|
21
|
+
- bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,31 @@
|
|
1
|
+
## 0.6.1 - 2019-08-28
|
2
|
+
|
3
|
+
* [maintenance] Release a new gem not to include symlinks to make it work on Windows.
|
4
|
+
|
5
|
+
## 0.6.0 - 2019-08-11
|
6
|
+
|
7
|
+
Cleanup `auth_method`:
|
8
|
+
|
9
|
+
* [enhancement] Support `auth_method: authorized_user` (OAuth)
|
10
|
+
* [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
|
11
|
+
* [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
|
12
|
+
* [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
|
13
|
+
|
14
|
+
## 0.5.0 - 2019-08-10
|
15
|
+
|
16
|
+
* [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
|
17
|
+
* [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
|
18
|
+
* [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
|
19
|
+
* [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
|
20
|
+
|
21
|
+
## 0.4.14 - 2019-08-10
|
22
|
+
|
23
|
+
* [enhancement] Support field partitioning correctly.
|
24
|
+
|
25
|
+
## 0.4.13 - 2019-03-20
|
26
|
+
|
27
|
+
* [enhancement] Support clustered table as an experimental feature
|
28
|
+
|
1
29
|
## 0.4.12 - 2019-03-20
|
2
30
|
|
3
31
|
* [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -23,34 +23,23 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
|
|
23
23
|
Current version of this plugin supports Google API with Service Account Authentication, but does not support
|
24
24
|
OAuth flow for installed applications.
|
25
25
|
|
26
|
-
### INCOMPATIBILITY CHANGES
|
27
|
-
|
28
|
-
v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGELOG.md) for details.
|
29
|
-
|
30
|
-
* `formatter` option (formatter plugin support) is dropped. Use `source_format` option instead. (it already exists in v0.2.x too)
|
31
|
-
* `encoders` option (encoder plugin support) is dropped. Use `compression` option instead (it already exists in v0.2.x too).
|
32
|
-
* `mode: append` mode now expresses a transactional append, and `mode: append_direct` is one which is not transactional.
|
33
|
-
|
34
26
|
## Configuration
|
35
27
|
|
36
28
|
#### Original options
|
37
29
|
|
38
30
|
| name | type | required? | default | description |
|
39
31
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
40
|
-
| mode | string | optional | "append" | See [Mode](#mode)
|
41
|
-
| auth_method | string | optional | "
|
42
|
-
|
|
43
|
-
|
|
44
|
-
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
45
|
-
| project | string | required if json_keyfile is not given | | project_id |
|
32
|
+
| mode | string | optional | "append" | See [Mode](#mode) |
|
33
|
+
| auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
|
34
|
+
| json_keyfile | string | optional | | keyfile path or `content` |
|
35
|
+
| project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
|
46
36
|
| dataset | string | required | | dataset |
|
47
37
|
| location | string | optional | nil | geographic location of dataset. See [Location](#location) |
|
48
38
|
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
49
39
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
50
|
-
| auto_create_table | boolean | optional |
|
40
|
+
| auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
|
51
41
|
| schema_file | string | optional | | /path/to/schema.json |
|
52
42
|
| template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
|
53
|
-
| prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
|
54
43
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
55
44
|
| job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
|
56
45
|
| is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
|
@@ -107,7 +96,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
107
96
|
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
108
97
|
| time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
|
109
98
|
| time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
|
110
|
-
|
|
99
|
+
| clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
|
100
|
+
| clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
|
111
101
|
| schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
|
112
102
|
|
113
103
|
### Example
|
@@ -116,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
116
106
|
out:
|
117
107
|
type: bigquery
|
118
108
|
mode: append
|
119
|
-
auth_method:
|
120
|
-
|
121
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
109
|
+
auth_method: service_account
|
110
|
+
json_keyfile: /path/to/json_keyfile.json
|
122
111
|
project: your-project-000
|
123
112
|
dataset: your_dataset_name
|
124
113
|
table: your_table_name
|
@@ -126,7 +115,7 @@ out:
|
|
126
115
|
source_format: NEWLINE_DELIMITED_JSON
|
127
116
|
```
|
128
117
|
|
129
|
-
###
|
118
|
+
### Location
|
130
119
|
|
131
120
|
The geographic location of the dataset. Required except for US and EU.
|
132
121
|
|
@@ -134,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
|
|
134
123
|
|
135
124
|
See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
|
136
125
|
|
137
|
-
###
|
126
|
+
### Mode
|
138
127
|
|
139
128
|
5 modes are provided.
|
140
129
|
|
@@ -156,6 +145,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
|
|
156
145
|
|
157
146
|
```is_skip_job_result_check``` must be false when replace mode
|
158
147
|
|
148
|
+
NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
|
149
|
+
|
159
150
|
##### replace_backup
|
160
151
|
|
161
152
|
1. Load to temporary table (Create and WRITE_APPEND in parallel)
|
@@ -171,53 +162,69 @@ This is not transactional, i.e., if fails, the target table could have some rows
|
|
171
162
|
|
172
163
|
### Authentication
|
173
164
|
|
174
|
-
There are
|
165
|
+
There are four authentication methods
|
175
166
|
|
176
|
-
1.
|
177
|
-
|
178
|
-
|
167
|
+
1. `service_account` (or `json_key` for backward compatibility)
|
168
|
+
1. `authorized_user`
|
169
|
+
1. `compute_engine`
|
170
|
+
1. `application_default`
|
179
171
|
|
180
|
-
####
|
172
|
+
#### service\_account (or json\_key)
|
181
173
|
|
182
|
-
|
183
|
-
download its
|
174
|
+
Use GCP service account credentials.
|
175
|
+
You first need to create a service account, download its json key and deploy the key with embulk.
|
184
176
|
|
185
177
|
```yaml
|
186
178
|
out:
|
187
179
|
type: bigquery
|
188
|
-
auth_method:
|
189
|
-
|
190
|
-
p12_keyfile: /path/to/p12_keyfile.p12
|
180
|
+
auth_method: service_account
|
181
|
+
json_keyfile: /path/to/json_keyfile.json
|
191
182
|
```
|
192
183
|
|
193
|
-
|
184
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
194
185
|
|
195
|
-
|
196
|
-
|
186
|
+
```yaml
|
187
|
+
out:
|
188
|
+
type: bigquery
|
189
|
+
auth_method: service_account
|
190
|
+
json_keyfile:
|
191
|
+
content: |
|
192
|
+
{
|
193
|
+
"private_key_id": "123456789",
|
194
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
|
195
|
+
"client_email": "..."
|
196
|
+
}
|
197
|
+
```
|
198
|
+
|
199
|
+
#### authorized\_user
|
200
|
+
|
201
|
+
Use Google user credentials.
|
202
|
+
You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
|
197
203
|
|
198
204
|
```yaml
|
199
205
|
out:
|
200
206
|
type: bigquery
|
201
|
-
auth_method:
|
202
|
-
json_keyfile: /path/to/
|
207
|
+
auth_method: authorized_user
|
208
|
+
json_keyfile: /path/to/credentials.json
|
203
209
|
```
|
204
210
|
|
205
|
-
You can also embed contents of json_keyfile at config.yml.
|
211
|
+
You can also embed contents of `json_keyfile` at config.yml.
|
206
212
|
|
207
213
|
```yaml
|
208
214
|
out:
|
209
215
|
type: bigquery
|
210
|
-
auth_method:
|
216
|
+
auth_method: authorized_user
|
211
217
|
json_keyfile:
|
212
218
|
content: |
|
213
219
|
{
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
220
|
+
"client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
|
221
|
+
"client_secret":"xxxxxxxxxxx",
|
222
|
+
"refresh_token":"xxxxxxxxxxx",
|
223
|
+
"type":"authorized_user"
|
224
|
+
}
|
218
225
|
```
|
219
226
|
|
220
|
-
####
|
227
|
+
#### compute\_engine
|
221
228
|
|
222
229
|
On the other hand, you don't need to explicitly create a service account for embulk when you
|
223
230
|
run embulk in Google Compute Engine. In this third authentication method, you need to
|
@@ -230,6 +237,22 @@ out:
|
|
230
237
|
auth_method: compute_engine
|
231
238
|
```
|
232
239
|
|
240
|
+
#### application\_default
|
241
|
+
|
242
|
+
Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
|
243
|
+
|
244
|
+
1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
|
245
|
+
2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
|
246
|
+
3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
|
247
|
+
|
248
|
+
See https://cloud.google.com/docs/authentication/production for details.
|
249
|
+
|
250
|
+
```yaml
|
251
|
+
out:
|
252
|
+
type: bigquery
|
253
|
+
auth_method: application_default
|
254
|
+
```
|
255
|
+
|
233
256
|
### Table id formatting
|
234
257
|
|
235
258
|
`table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
@@ -238,20 +261,16 @@ Table ids are formatted at runtime
|
|
238
261
|
using the local time of the embulk server.
|
239
262
|
|
240
263
|
For example, with the configuration below,
|
241
|
-
data is inserted into tables `
|
264
|
+
data is inserted into tables `table_20150503`, `table_20150504` and so on.
|
242
265
|
|
243
266
|
```yaml
|
244
267
|
out:
|
245
268
|
type: bigquery
|
246
|
-
table: table_%
|
269
|
+
table: table_%Y%m%d
|
247
270
|
```
|
248
271
|
|
249
272
|
### Dynamic table creating
|
250
273
|
|
251
|
-
When `auto_create_table` is set to true, try to create the table using BigQuery API.
|
252
|
-
|
253
|
-
If table already exists, insert into it.
|
254
|
-
|
255
274
|
There are 3 ways to set schema.
|
256
275
|
|
257
276
|
#### Set schema.json
|
@@ -262,7 +281,7 @@ Please set file path of schema.json.
|
|
262
281
|
out:
|
263
282
|
type: bigquery
|
264
283
|
auto_create_table: true
|
265
|
-
table: table_%
|
284
|
+
table: table_%Y%m%d
|
266
285
|
schema_file: /path/to/schema.json
|
267
286
|
```
|
268
287
|
|
@@ -274,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
|
|
274
293
|
out:
|
275
294
|
type: bigquery
|
276
295
|
auto_create_table: true
|
277
|
-
table: table_%
|
296
|
+
table: table_%Y%m%d
|
278
297
|
template_table: existing_table_name
|
279
298
|
```
|
280
299
|
|
@@ -350,25 +369,9 @@ out:
|
|
350
369
|
payload_column_index: 0 # or, payload_column: payload
|
351
370
|
```
|
352
371
|
|
353
|
-
### Prevent Duplication
|
354
|
-
|
355
|
-
`prevent_duplicate_insert` option is used to prevent inserting same data for modes `append` or `append_direct`.
|
356
|
-
|
357
|
-
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options.
|
358
|
-
|
359
|
-
`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
|
360
|
-
|
361
|
-
[job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency) so that same data can't be inserted with same settings repeatedly.
|
362
|
-
|
363
|
-
```yaml
|
364
|
-
out:
|
365
|
-
type: bigquery
|
366
|
-
prevent_duplicate_insert: true
|
367
|
-
```
|
368
|
-
|
369
372
|
### GCS Bucket
|
370
373
|
|
371
|
-
This is useful to reduce number of consumed jobs, which is limited by [
|
374
|
+
This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
|
372
375
|
|
373
376
|
This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
|
374
377
|
|
@@ -396,32 +399,31 @@ To load into a partition, specify `table` parameter with a partition decorator a
|
|
396
399
|
out:
|
397
400
|
type: bigquery
|
398
401
|
table: table_name$20160929
|
399
|
-
auto_create_table: true
|
400
402
|
```
|
401
403
|
|
402
|
-
You may configure `time_partitioning` parameter together
|
404
|
+
You may configure `time_partitioning` parameter together as:
|
403
405
|
|
404
406
|
```yaml
|
405
407
|
out:
|
406
408
|
type: bigquery
|
407
409
|
table: table_name$20160929
|
408
|
-
auto_create_table: true
|
409
410
|
time_partitioning:
|
410
411
|
type: DAY
|
411
412
|
expiration_ms: 259200000
|
412
413
|
```
|
413
414
|
|
414
415
|
You can also create column-based partitioning table as:
|
416
|
+
|
415
417
|
```yaml
|
416
418
|
out:
|
417
419
|
type: bigquery
|
418
420
|
mode: replace
|
419
|
-
auto_create_table: true
|
420
421
|
table: table_name
|
421
422
|
time_partitioning:
|
422
423
|
type: DAY
|
423
424
|
field: timestamp
|
424
425
|
```
|
426
|
+
|
425
427
|
Note the `time_partitioning.field` should be top-level `DATE` or `TIMESTAMP`.
|
426
428
|
|
427
429
|
Use [Tables: patch](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) API to update the schema of the partitioned table, embulk-output-bigquery itself does not support it, though.
|
@@ -446,8 +448,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
|
|
446
448
|
|
447
449
|
Place your embulk with `.jar` extension:
|
448
450
|
|
451
|
+
|
452
|
+
```
|
453
|
+
$ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
|
454
|
+
$ chmod a+x embulk.jar
|
455
|
+
```
|
456
|
+
|
457
|
+
Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
|
458
|
+
|
459
|
+
```
|
460
|
+
$ echo JRUBY_VERSION | ./embulk.jar irb
|
461
|
+
2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
|
462
|
+
Switch to inspect mode.
|
463
|
+
JRUBY_VERSION
|
464
|
+
"X.X.X.X"
|
465
|
+
|
466
|
+
$ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
|
467
|
+
2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
|
468
|
+
Switch to inspect mode.
|
469
|
+
require 'bundler'; Bundler::VERSION
|
470
|
+
"Y.Y.Y"
|
471
|
+
```
|
472
|
+
|
473
|
+
Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
|
474
|
+
|
475
|
+
```
|
476
|
+
$ rbenv install jruby-X.X.X.X
|
477
|
+
$ rbenv local jruby-X.X.X.X
|
478
|
+
$ gem install bundler -v Y.Y.Y
|
479
|
+
```
|
480
|
+
|
481
|
+
Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
|
482
|
+
|
449
483
|
```
|
450
|
-
$
|
484
|
+
$ ./embulk.jar bundle install --path vendor/bundle
|
451
485
|
```
|
452
486
|
|
453
487
|
Run tests with `env RUBYOPT="-r ./embulk.jar`:
|
@@ -466,7 +500,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
|
|
466
500
|
|
467
501
|
### Release gem:
|
468
502
|
|
469
|
-
|
503
|
+
Change the version of gemspec, and write CHANGELOG.md. Then,
|
470
504
|
|
471
505
|
```
|
472
506
|
$ bundle exec rake release
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.6.1"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -8,7 +8,9 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.licenses = ["MIT"]
|
9
9
|
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
10
10
|
|
11
|
-
|
11
|
+
# Exclude example directory which uses symlinks from generating gem.
|
12
|
+
# Symlinks do not work properly on the Windows platform without administrator privilege.
|
13
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
|
12
14
|
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
15
|
spec.require_paths = ["lib"]
|
14
16
|
|