embulk-output-bigquery 0.4.12 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +10 -6
  3. data/CHANGELOG.md +28 -0
  4. data/Gemfile +2 -0
  5. data/README.md +109 -75
  6. data/embulk-output-bigquery.gemspec +4 -2
  7. data/lib/embulk/output/bigquery.rb +38 -50
  8. data/lib/embulk/output/bigquery/auth.rb +35 -0
  9. data/lib/embulk/output/bigquery/bigquery_client.rb +31 -31
  10. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  11. data/lib/embulk/output/bigquery/helper.rb +8 -4
  12. data/test/helper.rb +2 -1
  13. data/test/test_bigquery_client.rb +17 -21
  14. data/test/test_configure.rb +19 -19
  15. data/test/test_example.rb +5 -4
  16. data/test/test_transaction.rb +36 -76
  17. metadata +3 -45
  18. data/example/config_append_direct_schema_update_options.yml +0 -31
  19. data/example/config_client_options.yml +0 -33
  20. data/example/config_csv.yml +0 -30
  21. data/example/config_delete_in_advance.yml +0 -29
  22. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  23. data/example/config_expose_errors.yml +0 -30
  24. data/example/config_gcs.yml +0 -32
  25. data/example/config_guess_from_embulk_schema.yml +0 -29
  26. data/example/config_guess_with_column_options.yml +0 -40
  27. data/example/config_gzip.yml +0 -1
  28. data/example/config_jsonl.yml +0 -1
  29. data/example/config_max_threads.yml +0 -34
  30. data/example/config_min_ouput_tasks.yml +0 -34
  31. data/example/config_mode_append.yml +0 -30
  32. data/example/config_mode_append_direct.yml +0 -30
  33. data/example/config_nested_record.yml +0 -1
  34. data/example/config_payload_column.yml +0 -20
  35. data/example/config_payload_column_index.yml +0 -20
  36. data/example/config_prevent_duplicate_insert.yml +0 -30
  37. data/example/config_progress_log_interval.yml +0 -31
  38. data/example/config_replace.yml +0 -30
  39. data/example/config_replace_backup.yml +0 -32
  40. data/example/config_replace_backup_paritioned_table.yml +0 -34
  41. data/example/config_replace_paritioned_table.yml +0 -33
  42. data/example/config_replace_schema_update_options.yml +0 -33
  43. data/example/config_skip_file_generation.yml +0 -32
  44. data/example/config_table_strftime.yml +0 -30
  45. data/example/config_template_table.yml +0 -21
  46. data/example/config_uncompressed.yml +0 -1
  47. data/example/config_with_rehearsal.yml +0 -33
  48. data/example/example.csv +0 -17
  49. data/example/example.jsonl +0 -16
  50. data/example/example.yml +0 -1
  51. data/example/example2_1.csv +0 -1
  52. data/example/example2_2.csv +0 -1
  53. data/example/example4_1.csv +0 -1
  54. data/example/example4_2.csv +0 -1
  55. data/example/example4_3.csv +0 -1
  56. data/example/example4_4.csv +0 -1
  57. data/example/json_key.json +0 -12
  58. data/example/nested_example.jsonl +0 -16
  59. data/example/schema.json +0 -30
  60. data/example/schema_expose_errors.json +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fceabdc34780426ea3ceda76c1afe00a4c99115207bb8feaf0151ce9ae48911
4
- data.tar.gz: 708649547b8a6693a12722e376c5c923a6e3f3a17ef180ebed56ecc92d6270b3
3
+ metadata.gz: ddfd10c5e85614e1dae0333494333653f1af95b8158dfda8977f8b00d64b3478
4
+ data.tar.gz: 2cec70eaa49c828d7fe9347bc0d9699b9398f21db96880e997a66bdab23deb89
5
5
  SHA512:
6
- metadata.gz: 8d31eb9867c7c70b9eb1b01bfb2889afae9a3693b328391d62ddc74cc552452832d50edb2af8d69285b9468cc99292c72de132f86bf48d2cb1920ab6f2be5fcf
7
- data.tar.gz: 4bcab8f4bf48962985d9904c64530dd45b71dcd2afbc42388f3640636f0c1a971f0fcebda0db8bde2599ab54355210822bcf5de8d2196d42eceb9bca3d6145ae
6
+ metadata.gz: 4782a28272da610f8399aca50cc4ddaefea00b8dbf45a37bec24771d7ecdb05bbdcd6de85ff167c5c3745f6689413c215689bb8d420960705cd6cb2026e99932
7
+ data.tar.gz: 9dbabb787e2f1b5797ccb2a2cd8786ce28d0e0d01310cd522ea4894337a279e809de10abca14b50b836553b6de95df4afd886596d75e7193d4de60a5c6f95781
data/.travis.yml CHANGED
@@ -1,17 +1,21 @@
1
1
  language: ruby
2
2
  matrix:
3
3
  include:
4
- - env: EMBULK_VERSION=0.8.39
5
- rvm: jruby-9.1.5.0 # bundled jruby version
6
- jdk: openjdk7 # embulk 0.8.x uses jdk7
7
4
  - env: EMBULK_VERSION=0.9.15
8
- rvm: jruby-9.1.5.0 # bundled jruby version
5
+ rvm: jruby-9.1.15.0 # bundled jruby version
9
6
  jdk: openjdk8 # embulk 0.9.x uses jdk8
10
7
  - env: EMBULK_VERSION=latest
11
- rvm: jruby-9.1.5.0 # ?
8
+ rvm: jruby-9.1.15.0 # ?
12
9
  jdk: openjdk8 # ?
13
10
  allow_failures:
14
11
  - env: EMBULK_VERSION=latest
15
12
  before_install:
16
13
  - curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
17
- script: bundle exec env RUBYOPT="-r ./embulk.jar" rake test
14
+ - chmod a+x embulk.jar
15
+ - BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
16
+ - gem uninstall bundler -x
17
+ - gem install bundler -v ${BUNDLER_VERSION}
18
+ install:
19
+ - ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
20
+ script:
21
+ - bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
data/CHANGELOG.md CHANGED
@@ -1,3 +1,31 @@
1
+ ## 0.6.1 - 2019-08-28
2
+
3
+ * [maintenance] Release a new gem not to include symlinks to make it work on Windows.
4
+
5
+ ## 0.6.0 - 2019-08-11
6
+
7
+ Cleanup `auth_method`:
8
+
9
+ * [enhancement] Support `auth_method: authorized_user` (OAuth)
10
+ * [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
11
+ * [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
12
+ * [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
13
+
14
+ ## 0.5.0 - 2019-08-10
15
+
16
+ * [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
17
+ * [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
18
+ * [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
19
+ * [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
20
+
21
+ ## 0.4.14 - 2019-08-10
22
+
23
+ * [enhancement] Support field partitioning correctly.
24
+
25
+ ## 0.4.13 - 2019-03-20
26
+
27
+ * [enhancement] Support clustered table as an experimental feature
28
+
1
29
  ## 0.4.12 - 2019-03-20
2
30
 
3
31
  * [maintenance] Fix `time_partitioning.requirePartitionFilter` was not working. Use `time_partitioning.require_partition_filter` (thanks to @gitetsu)
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
1
  source 'https://rubygems.org/'
2
2
 
3
3
  gemspec
4
+ gem 'embulk'
5
+ gem 'liquid', '= 4.0.0' # the version included in embulk.jar
4
6
  gem 'embulk-parser-none'
5
7
  gem 'embulk-parser-jsonl'
6
8
  gem 'pry-nav'
data/README.md CHANGED
@@ -23,34 +23,23 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
23
23
  Current version of this plugin supports Google API with Service Account Authentication, but does not support
24
24
  OAuth flow for installed applications.
25
25
 
26
- ### INCOMPATIBILITY CHANGES
27
-
28
- v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGELOG.md) for details.
29
-
30
- * `formatter` option (formatter plugin support) is dropped. Use `source_format` option instead. (it already exists in v0.2.x too)
31
- * `encoders` option (encoder plugin support) is dropped. Use `compression` option instead (it already exists in v0.2.x too).
32
- * `mode: append` mode now expresses a transactional append, and `mode: append_direct` is one which is not transactional.
33
-
34
26
  ## Configuration
35
27
 
36
28
  #### Original options
37
29
 
38
30
  | name | type | required? | default | description |
39
31
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
40
- | mode | string | optional | "append" | See [Mode](#mode) |
41
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
42
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
43
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
44
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
45
- | project | string | required if json_keyfile is not given | | project_id |
32
+ | mode | string | optional | "append" | See [Mode](#mode) |
33
+ | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
+ | json_keyfile | string | optional | | keyfile path or `content` |
35
+ | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
46
36
  | dataset | string | required | | dataset |
47
37
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
48
38
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
49
39
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
50
- | auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
40
+ | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
51
41
  | schema_file | string | optional | | /path/to/schema.json |
52
42
  | template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
53
- | prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
54
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
55
44
  | job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
56
45
  | is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
@@ -107,7 +96,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
107
96
  | time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
108
97
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
98
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
- | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
99
+ | clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
100
+ | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
111
101
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
112
102
 
113
103
  ### Example
@@ -116,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
116
106
  out:
117
107
  type: bigquery
118
108
  mode: append
119
- auth_method: private_key # default
120
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
121
- p12_keyfile: /path/to/p12_keyfile.p12
109
+ auth_method: service_account
110
+ json_keyfile: /path/to/json_keyfile.json
122
111
  project: your-project-000
123
112
  dataset: your_dataset_name
124
113
  table: your_table_name
@@ -126,7 +115,7 @@ out:
126
115
  source_format: NEWLINE_DELIMITED_JSON
127
116
  ```
128
117
 
129
- ### location
118
+ ### Location
130
119
 
131
120
  The geographic location of the dataset. Required except for US and EU.
132
121
 
@@ -134,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
134
123
 
135
124
  See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
136
125
 
137
- ### mode
126
+ ### Mode
138
127
 
139
128
  5 modes are provided.
140
129
 
@@ -156,6 +145,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
156
145
 
157
146
  ```is_skip_job_result_check``` must be false when replace mode
158
147
 
148
+ NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
149
+
159
150
  ##### replace_backup
160
151
 
161
152
  1. Load to temporary table (Create and WRITE_APPEND in parallel)
@@ -171,53 +162,69 @@ This is not transactional, i.e., if fails, the target table could have some rows
171
162
 
172
163
  ### Authentication
173
164
 
174
- There are three methods supported to fetch access token for the service account.
165
+ There are four authentication methods
175
166
 
176
- 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
177
- 2. JSON key of GCP(Google Cloud Platform)'s service account
178
- 3. Pre-defined access token (Google Compute Engine only)
167
+ 1. `service_account` (or `json_key` for backward compatibility)
168
+ 1. `authorized_user`
169
+ 1. `compute_engine`
170
+ 1. `application_default`
179
171
 
180
- #### Public-Private key pair of GCP's service account
172
+ #### service\_account (or json\_key)
181
173
 
182
- You first need to create a service account (client ID),
183
- download its private key and deploy the key with embulk.
174
+ Use GCP service account credentials.
175
+ You first need to create a service account, download its json key and deploy the key with embulk.
184
176
 
185
177
  ```yaml
186
178
  out:
187
179
  type: bigquery
188
- auth_method: private_key # default
189
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
190
- p12_keyfile: /path/to/p12_keyfile.p12
180
+ auth_method: service_account
181
+ json_keyfile: /path/to/json_keyfile.json
191
182
  ```
192
183
 
193
- #### JSON key of GCP's service account
184
+ You can also embed contents of `json_keyfile` at config.yml.
194
185
 
195
- You first need to create a service account (client ID),
196
- download its json key and deploy the key with embulk.
186
+ ```yaml
187
+ out:
188
+ type: bigquery
189
+ auth_method: service_account
190
+ json_keyfile:
191
+ content: |
192
+ {
193
+ "private_key_id": "123456789",
194
+ "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
195
+ "client_email": "..."
196
+ }
197
+ ```
198
+
199
+ #### authorized\_user
200
+
201
+ Use Google user credentials.
202
+ You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
197
203
 
198
204
  ```yaml
199
205
  out:
200
206
  type: bigquery
201
- auth_method: json_key
202
- json_keyfile: /path/to/json_keyfile.json
207
+ auth_method: authorized_user
208
+ json_keyfile: /path/to/credentials.json
203
209
  ```
204
210
 
205
- You can also embed contents of json_keyfile at config.yml.
211
+ You can also embed contents of `json_keyfile` at config.yml.
206
212
 
207
213
  ```yaml
208
214
  out:
209
215
  type: bigquery
210
- auth_method: json_key
216
+ auth_method: authorized_user
211
217
  json_keyfile:
212
218
  content: |
213
219
  {
214
- "private_key_id": "123456789",
215
- "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
216
- "client_email": "..."
217
- }
220
+ "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
221
+ "client_secret":"xxxxxxxxxxx",
222
+ "refresh_token":"xxxxxxxxxxx",
223
+ "type":"authorized_user"
224
+ }
218
225
  ```
219
226
 
220
- #### Pre-defined access token(GCE only)
227
+ #### compute\_engine
221
228
 
222
229
  On the other hand, you don't need to explicitly create a service account for embulk when you
223
230
  run embulk in Google Compute Engine. In this third authentication method, you need to
@@ -230,6 +237,22 @@ out:
230
237
  auth_method: compute_engine
231
238
  ```
232
239
 
240
+ #### application\_default
241
+
242
+ Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
243
+
244
+ 1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
245
+ 2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
246
+ 3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
247
+
248
+ See https://cloud.google.com/docs/authentication/production for details.
249
+
250
+ ```yaml
251
+ out:
252
+ type: bigquery
253
+ auth_method: application_default
254
+ ```
255
+
233
256
  ### Table id formatting
234
257
 
235
258
  `table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
@@ -238,20 +261,16 @@ Table ids are formatted at runtime
238
261
  using the local time of the embulk server.
239
262
 
240
263
  For example, with the configuration below,
241
- data is inserted into tables `table_2015_04`, `table_2015_05` and so on.
264
+ data is inserted into tables `table_20150503`, `table_20150504` and so on.
242
265
 
243
266
  ```yaml
244
267
  out:
245
268
  type: bigquery
246
- table: table_%Y_%m
269
+ table: table_%Y%m%d
247
270
  ```
248
271
 
249
272
  ### Dynamic table creating
250
273
 
251
- When `auto_create_table` is set to true, try to create the table using BigQuery API.
252
-
253
- If table already exists, insert into it.
254
-
255
274
  There are 3 ways to set schema.
256
275
 
257
276
  #### Set schema.json
@@ -262,7 +281,7 @@ Please set file path of schema.json.
262
281
  out:
263
282
  type: bigquery
264
283
  auto_create_table: true
265
- table: table_%Y_%m
284
+ table: table_%Y%m%d
266
285
  schema_file: /path/to/schema.json
267
286
  ```
268
287
 
@@ -274,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
274
293
  out:
275
294
  type: bigquery
276
295
  auto_create_table: true
277
- table: table_%Y_%m
296
+ table: table_%Y%m%d
278
297
  template_table: existing_table_name
279
298
  ```
280
299
 
@@ -350,25 +369,9 @@ out:
350
369
  payload_column_index: 0 # or, payload_column: payload
351
370
  ```
352
371
 
353
- ### Prevent Duplication
354
-
355
- `prevent_duplicate_insert` option is used to prevent inserting same data for modes `append` or `append_direct`.
356
-
357
- When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options.
358
-
359
- `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
360
-
361
- [job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency) so that same data can't be inserted with same settings repeatedly.
362
-
363
- ```yaml
364
- out:
365
- type: bigquery
366
- prevent_duplicate_insert: true
367
- ```
368
-
369
372
  ### GCS Bucket
370
373
 
371
- This is useful to reduce number of consumed jobs, which is limited by [50,000 jobs per project per day](https://cloud.google.com/bigquery/quota-policy#import).
374
+ This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
372
375
 
373
376
  This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
374
377
 
@@ -396,32 +399,31 @@ To load into a partition, specify `table` parameter with a partition decorator a
396
399
  out:
397
400
  type: bigquery
398
401
  table: table_name$20160929
399
- auto_create_table: true
400
402
  ```
401
403
 
402
- You may configure `time_partitioning` parameter together to create table via `auto_create_table: true` option as:
404
+ You may configure `time_partitioning` parameter together as:
403
405
 
404
406
  ```yaml
405
407
  out:
406
408
  type: bigquery
407
409
  table: table_name$20160929
408
- auto_create_table: true
409
410
  time_partitioning:
410
411
  type: DAY
411
412
  expiration_ms: 259200000
412
413
  ```
413
414
 
414
415
  You can also create column-based partitioning table as:
416
+
415
417
  ```yaml
416
418
  out:
417
419
  type: bigquery
418
420
  mode: replace
419
- auto_create_table: true
420
421
  table: table_name
421
422
  time_partitioning:
422
423
  type: DAY
423
424
  field: timestamp
424
425
  ```
426
+
425
427
  Note the `time_partitioning.field` should be top-level `DATE` or `TIMESTAMP`.
426
428
 
427
429
  Use [Tables: patch](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) API to update the schema of the partitioned table, embulk-output-bigquery itself does not support it, though.
@@ -446,8 +448,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
446
448
 
447
449
  Place your embulk with `.jar` extension:
448
450
 
451
+
452
+ ```
453
+ $ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
454
+ $ chmod a+x embulk.jar
455
+ ```
456
+
457
+ Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
458
+
459
+ ```
460
+ $ echo JRUBY_VERSION | ./embulk.jar irb
461
+ 2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
462
+ Switch to inspect mode.
463
+ JRUBY_VERSION
464
+ "X.X.X.X"
465
+
466
+ $ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
467
+ 2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
468
+ Switch to inspect mode.
469
+ require 'bundler'; Bundler::VERSION
470
+ "Y.Y.Y"
471
+ ```
472
+
473
+ Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
474
+
475
+ ```
476
+ $ rbenv install jruby-X.X.X.X
477
+ $ rbenv local jruby-X.X.X.X
478
+ $ gem install bundler -v Y.Y.Y
479
+ ```
480
+
481
+ Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
482
+
449
483
  ```
450
- $ cp -a $(which embulk) embulk.jar
484
+ $ ./embulk.jar bundle install --path vendor/bundle
451
485
  ```
452
486
 
453
487
  Run tests with `env RUBYOPT="-r ./embulk.jar`:
@@ -466,7 +500,7 @@ $ bundle exec env RUBYOPT="-r ./embulk.jar" ruby test/test_example.rb
466
500
 
467
501
  ### Release gem:
468
502
 
469
- Fix gemspec, then
503
+ Change the version of gemspec, and write CHANGELOG.md. Then,
470
504
 
471
505
  ```
472
506
  $ bundle exec rake release
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.12"
3
+ spec.version = "0.6.1"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -8,7 +8,9 @@ Gem::Specification.new do |spec|
8
8
  spec.licenses = ["MIT"]
9
9
  spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
10
 
11
- spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
11
+ # Exclude example directory which uses symlinks from generating gem.
12
+ # Symlinks do not work properly on the Windows platform without administrator privilege.
13
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
12
14
  spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
15
  spec.require_paths = ["lib"]
14
16