embulk-output-bigquery 0.4.13 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +10 -6
  3. data/CHANGELOG.md +28 -0
  4. data/Gemfile +2 -0
  5. data/README.md +107 -75
  6. data/embulk-output-bigquery.gemspec +10 -3
  7. data/lib/embulk/output/bigquery.rb +31 -50
  8. data/lib/embulk/output/bigquery/auth.rb +35 -0
  9. data/lib/embulk/output/bigquery/bigquery_client.rb +24 -31
  10. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  11. data/lib/embulk/output/bigquery/helper.rb +8 -4
  12. data/test/helper.rb +2 -1
  13. data/test/test_bigquery_client.rb +17 -21
  14. data/test/test_configure.rb +10 -19
  15. data/test/test_example.rb +5 -4
  16. data/test/test_transaction.rb +36 -76
  17. metadata +27 -49
  18. data/example/config_append_direct_schema_update_options.yml +0 -31
  19. data/example/config_client_options.yml +0 -33
  20. data/example/config_csv.yml +0 -30
  21. data/example/config_delete_in_advance.yml +0 -29
  22. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  23. data/example/config_expose_errors.yml +0 -30
  24. data/example/config_gcs.yml +0 -32
  25. data/example/config_guess_from_embulk_schema.yml +0 -29
  26. data/example/config_guess_with_column_options.yml +0 -40
  27. data/example/config_gzip.yml +0 -1
  28. data/example/config_jsonl.yml +0 -1
  29. data/example/config_max_threads.yml +0 -34
  30. data/example/config_min_ouput_tasks.yml +0 -34
  31. data/example/config_mode_append.yml +0 -30
  32. data/example/config_mode_append_direct.yml +0 -30
  33. data/example/config_nested_record.yml +0 -1
  34. data/example/config_payload_column.yml +0 -20
  35. data/example/config_payload_column_index.yml +0 -20
  36. data/example/config_prevent_duplicate_insert.yml +0 -30
  37. data/example/config_progress_log_interval.yml +0 -31
  38. data/example/config_replace.yml +0 -30
  39. data/example/config_replace_backup.yml +0 -32
  40. data/example/config_replace_backup_paritioned_table.yml +0 -34
  41. data/example/config_replace_paritioned_table.yml +0 -33
  42. data/example/config_replace_schema_update_options.yml +0 -33
  43. data/example/config_skip_file_generation.yml +0 -32
  44. data/example/config_table_strftime.yml +0 -30
  45. data/example/config_template_table.yml +0 -21
  46. data/example/config_uncompressed.yml +0 -1
  47. data/example/config_with_rehearsal.yml +0 -33
  48. data/example/example.csv +0 -17
  49. data/example/example.jsonl +0 -16
  50. data/example/example.yml +0 -1
  51. data/example/example2_1.csv +0 -1
  52. data/example/example2_2.csv +0 -1
  53. data/example/example4_1.csv +0 -1
  54. data/example/example4_2.csv +0 -1
  55. data/example/example4_3.csv +0 -1
  56. data/example/example4_4.csv +0 -1
  57. data/example/json_key.json +0 -12
  58. data/example/nested_example.jsonl +0 -16
  59. data/example/schema.json +0 -30
  60. data/example/schema_expose_errors.json +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
4
- data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
3
+ metadata.gz: 2168730943154d9fb8d8ebfce9e4a1c2130b16b5
4
+ data.tar.gz: 8c4549b91f75d3e7a874f310e0df791bd9c28030
5
5
  SHA512:
6
- metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
7
- data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
6
+ metadata.gz: ae7e67855daddce745e3d9a8c5ee659d99c7e84510365bf216f63fb7ed92b9ce166cb2b005b76680020f67354a6f97cb3596d9b15490630cf1b927968e5b8f0e
7
+ data.tar.gz: 83531a34355dbf3af0ec602db9318ad52e0b6b9ba84b5ea1bef15f4a50b67fa69b664e0bf62deffe8f37ac62d9b995a6c5a135b69e5cf365df06d97f32264cfa
data/.travis.yml CHANGED
@@ -1,17 +1,21 @@
1
1
  language: ruby
2
2
  matrix:
3
3
  include:
4
- - env: EMBULK_VERSION=0.8.39
5
- rvm: jruby-9.1.5.0 # bundled jruby version
6
- jdk: openjdk7 # embulk 0.8.x uses jdk7
7
4
  - env: EMBULK_VERSION=0.9.15
8
- rvm: jruby-9.1.5.0 # bundled jruby version
5
+ rvm: jruby-9.1.15.0 # bundled jruby version
9
6
  jdk: openjdk8 # embulk 0.9.x uses jdk8
10
7
  - env: EMBULK_VERSION=latest
11
- rvm: jruby-9.1.5.0 # ?
8
+ rvm: jruby-9.1.15.0 # ?
12
9
  jdk: openjdk8 # ?
13
10
  allow_failures:
14
11
  - env: EMBULK_VERSION=latest
15
12
  before_install:
16
13
  - curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
17
- script: bundle exec env RUBYOPT="-r ./embulk.jar" rake test
14
+ - chmod a+x embulk.jar
15
+ - BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
16
+ - gem uninstall bundler -x
17
+ - gem install bundler -v ${BUNDLER_VERSION}
18
+ install:
19
+ - ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
20
+ script:
21
+ - bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
data/CHANGELOG.md CHANGED
@@ -1,3 +1,31 @@
1
+ ## 0.6.2 - 2019-10-16
2
+
3
+ * [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
4
+
5
+ ## 0.6.1 - 2019-08-28
6
+
7
+ * [maintenance] Release a new gem not to include symlinks to make it work on Windows.
8
+
9
+ ## 0.6.0 - 2019-08-11
10
+
11
+ Cleanup `auth_method`:
12
+
13
+ * [enhancement] Support `auth_method: authorized_user` (OAuth)
14
+ * [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
15
+ * [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
16
+ * [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
17
+
18
+ ## 0.5.0 - 2019-08-10
19
+
20
+ * [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
21
+ * [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
22
+ * [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
23
+ * [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
24
+
25
+ ## 0.4.14 - 2019-08-10
26
+
27
+ * [enhancement] Support field partitioning correctly.
28
+
1
29
  ## 0.4.13 - 2019-03-20
2
30
 
3
31
  * [enhancement] Support clustered table as an experimental feature
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
1
  source 'https://rubygems.org/'
2
2
 
3
3
  gemspec
4
+ gem 'embulk'
5
+ gem 'liquid', '= 4.0.0' # the version included in embulk.jar
4
6
  gem 'embulk-parser-none'
5
7
  gem 'embulk-parser-jsonl'
6
8
  gem 'pry-nav'
data/README.md CHANGED
@@ -23,34 +23,23 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
23
23
  Current version of this plugin supports Google API with Service Account Authentication, but does not support
24
24
  OAuth flow for installed applications.
25
25
 
26
- ### INCOMPATIBILITY CHANGES
27
-
28
- v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGELOG.md) for details.
29
-
30
- * `formatter` option (formatter plugin support) is dropped. Use `source_format` option instead. (it already exists in v0.2.x too)
31
- * `encoders` option (encoder plugin support) is dropped. Use `compression` option instead (it already exists in v0.2.x too).
32
- * `mode: append` mode now expresses a transactional append, and `mode: append_direct` is one which is not transactional.
33
-
34
26
  ## Configuration
35
27
 
36
28
  #### Original options
37
29
 
38
30
  | name | type | required? | default | description |
39
31
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
40
- | mode | string | optional | "append" | See [Mode](#mode) |
41
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
42
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
43
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
44
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
45
- | project | string | required if json_keyfile is not given | | project_id |
32
+ | mode | string | optional | "append" | See [Mode](#mode) |
33
+ | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
+ | json_keyfile | string | optional | | keyfile path or `content` |
35
+ | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
46
36
  | dataset | string | required | | dataset |
47
37
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
48
38
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
49
39
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
50
- | auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
40
+ | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
51
41
  | schema_file | string | optional | | /path/to/schema.json |
52
42
  | template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
53
- | prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
54
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
55
44
  | job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
56
45
  | is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
@@ -107,8 +96,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
107
96
  | time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
108
97
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
98
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
- | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
111
- | clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
99
+ | clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
112
100
  | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
113
101
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
114
102
 
@@ -118,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
118
106
  out:
119
107
  type: bigquery
120
108
  mode: append
121
- auth_method: private_key # default
122
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
123
- p12_keyfile: /path/to/p12_keyfile.p12
109
+ auth_method: service_account
110
+ json_keyfile: /path/to/json_keyfile.json
124
111
  project: your-project-000
125
112
  dataset: your_dataset_name
126
113
  table: your_table_name
@@ -128,7 +115,7 @@ out:
128
115
  source_format: NEWLINE_DELIMITED_JSON
129
116
  ```
130
117
 
131
- ### location
118
+ ### Location
132
119
 
133
120
  The geographic location of the dataset. Required except for US and EU.
134
121
 
@@ -136,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
136
123
 
137
124
  See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
138
125
 
139
- ### mode
126
+ ### Mode
140
127
 
141
128
  5 modes are provided.
142
129
 
@@ -158,6 +145,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
158
145
 
159
146
  ```is_skip_job_result_check``` must be false when replace mode
160
147
 
148
+ NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
149
+
161
150
  ##### replace_backup
162
151
 
163
152
  1. Load to temporary table (Create and WRITE_APPEND in parallel)
@@ -173,53 +162,69 @@ This is not transactional, i.e., if fails, the target table could have some rows
173
162
 
174
163
  ### Authentication
175
164
 
176
- There are three methods supported to fetch access token for the service account.
165
+ There are four authentication methods
177
166
 
178
- 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
179
- 2. JSON key of GCP(Google Cloud Platform)'s service account
180
- 3. Pre-defined access token (Google Compute Engine only)
167
+ 1. `service_account` (or `json_key` for backward compatibility)
168
+ 1. `authorized_user`
169
+ 1. `compute_engine`
170
+ 1. `application_default`
181
171
 
182
- #### Public-Private key pair of GCP's service account
172
+ #### service\_account (or json\_key)
183
173
 
184
- You first need to create a service account (client ID),
185
- download its private key and deploy the key with embulk.
174
+ Use GCP service account credentials.
175
+ You first need to create a service account, download its json key and deploy the key with embulk.
186
176
 
187
177
  ```yaml
188
178
  out:
189
179
  type: bigquery
190
- auth_method: private_key # default
191
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
192
- p12_keyfile: /path/to/p12_keyfile.p12
180
+ auth_method: service_account
181
+ json_keyfile: /path/to/json_keyfile.json
193
182
  ```
194
183
 
195
- #### JSON key of GCP's service account
184
+ You can also embed contents of `json_keyfile` at config.yml.
196
185
 
197
- You first need to create a service account (client ID),
198
- download its json key and deploy the key with embulk.
186
+ ```yaml
187
+ out:
188
+ type: bigquery
189
+ auth_method: service_account
190
+ json_keyfile:
191
+ content: |
192
+ {
193
+ "private_key_id": "123456789",
194
+ "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
195
+ "client_email": "..."
196
+ }
197
+ ```
198
+
199
+ #### authorized\_user
200
+
201
+ Use Google user credentials.
202
+ You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
199
203
 
200
204
  ```yaml
201
205
  out:
202
206
  type: bigquery
203
- auth_method: json_key
204
- json_keyfile: /path/to/json_keyfile.json
207
+ auth_method: authorized_user
208
+ json_keyfile: /path/to/credentials.json
205
209
  ```
206
210
 
207
- You can also embed contents of json_keyfile at config.yml.
211
+ You can also embed contents of `json_keyfile` at config.yml.
208
212
 
209
213
  ```yaml
210
214
  out:
211
215
  type: bigquery
212
- auth_method: json_key
216
+ auth_method: authorized_user
213
217
  json_keyfile:
214
218
  content: |
215
219
  {
216
- "private_key_id": "123456789",
217
- "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
218
- "client_email": "..."
219
- }
220
+ "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
221
+ "client_secret":"xxxxxxxxxxx",
222
+ "refresh_token":"xxxxxxxxxxx",
223
+ "type":"authorized_user"
224
+ }
220
225
  ```
221
226
 
222
- #### Pre-defined access token(GCE only)
227
+ #### compute\_engine
223
228
 
224
229
  On the other hand, you don't need to explicitly create a service account for embulk when you
225
230
  run embulk in Google Compute Engine. In this third authentication method, you need to
@@ -232,6 +237,22 @@ out:
232
237
  auth_method: compute_engine
233
238
  ```
234
239
 
240
+ #### application\_default
241
+
242
+ Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
243
+
244
+ 1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
245
+ 2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
246
+ 3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
247
+
248
+ See https://cloud.google.com/docs/authentication/production for details.
249
+
250
+ ```yaml
251
+ out:
252
+ type: bigquery
253
+ auth_method: application_default
254
+ ```
255
+
235
256
  ### Table id formatting
236
257
 
237
258
  `table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
@@ -240,20 +261,16 @@ Table ids are formatted at runtime
240
261
  using the local time of the embulk server.
241
262
 
242
263
  For example, with the configuration below,
243
- data is inserted into tables `table_2015_04`, `table_2015_05` and so on.
264
+ data is inserted into tables `table_20150503`, `table_20150504` and so on.
244
265
 
245
266
  ```yaml
246
267
  out:
247
268
  type: bigquery
248
- table: table_%Y_%m
269
+ table: table_%Y%m%d
249
270
  ```
250
271
 
251
272
  ### Dynamic table creating
252
273
 
253
- When `auto_create_table` is set to true, try to create the table using BigQuery API.
254
-
255
- If table already exists, insert into it.
256
-
257
274
  There are 3 ways to set schema.
258
275
 
259
276
  #### Set schema.json
@@ -264,7 +281,7 @@ Please set file path of schema.json.
264
281
  out:
265
282
  type: bigquery
266
283
  auto_create_table: true
267
- table: table_%Y_%m
284
+ table: table_%Y%m%d
268
285
  schema_file: /path/to/schema.json
269
286
  ```
270
287
 
@@ -276,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
276
293
  out:
277
294
  type: bigquery
278
295
  auto_create_table: true
279
- table: table_%Y_%m
296
+ table: table_%Y%m%d
280
297
  template_table: existing_table_name
281
298
  ```
282
299
 
@@ -352,25 +369,9 @@ out:
352
369
  payload_column_index: 0 # or, payload_column: payload
353
370
  ```
354
371
 
355
- ### Prevent Duplication
356
-
357
- `prevent_duplicate_insert` option is used to prevent inserting same data for modes `append` or `append_direct`.
358
-
359
- When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options.
360
-
361
- `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
362
-
363
- [job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency) so that same data can't be inserted with same settings repeatedly.
364
-
365
- ```yaml
366
- out:
367
- type: bigquery
368
- prevent_duplicate_insert: true
369
- ```
370
-
371
372
  ### GCS Bucket
372
373
 
373
- This is useful to reduce number of consumed jobs, which is limited by [50,000 jobs per project per day](https://cloud.google.com/bigquery/quota-policy#import).
374
+ This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
374
375
 
375
376
  This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
376
377
 
@@ -398,32 +399,31 @@ To load into a partition, specify `table` parameter with a partition decorator a
398
399
  out:
399
400
  type: bigquery
400
401
  table: table_name$20160929
401
- auto_create_table: true
402
402
  ```
403
403
 
404
- You may configure `time_partitioning` parameter together to create table via `auto_create_table: true` option as:
404
+ You may configure `time_partitioning` parameter together as:
405
405
 
406
406
  ```yaml
407
407
  out:
408
408
  type: bigquery
409
409
  table: table_name$20160929
410
- auto_create_table: true
411
410
  time_partitioning:
412
411
  type: DAY
413
412
  expiration_ms: 259200000
414
413
  ```
415
414
 
416
415
  You can also create column-based partitioning table as:
416
+
417
417
  ```yaml
418
418
  out:
419
419
  type: bigquery
420
420
  mode: replace
421
- auto_create_table: true
422
421
  table: table_name
423
422
  time_partitioning:
424
423
  type: DAY
425
424
  field: timestamp
426
425
  ```
426
+
427
427
  Note the `time_partitioning.field` should be top-level `DATE` or `TIMESTAMP`.
428
428
 
429
429
  Use [Tables: patch](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) API to update the schema of the partitioned table, embulk-output-bigquery itself does not support it, though.
@@ -448,8 +448,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
448
448
 
449
449
  Place your embulk with `.jar` extension:
450
450
 
451
+
452
+ ```
453
+ $ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
454
+ $ chmod a+x embulk.jar
455
+ ```
456
+
457
+ Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
458
+
459
+ ```
460
+ $ echo JRUBY_VERSION | ./embulk.jar irb
461
+ 2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
462
+ Switch to inspect mode.
463
+ JRUBY_VERSION
464
+ "X.X.X.X"
465
+
466
+ $ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
467
+ 2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
468
+ Switch to inspect mode.
469
+ require 'bundler'; Bundler::VERSION
470
+ "Y.Y.Y"
471
+ ```
472
+
473
+ Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
474
+
475
+ ```
476
+ $ rbenv install jruby-X.X.X.X
477
+ $ rbenv local jruby-X.X.X.X
478
+ $ gem install bundler -v Y.Y.Y
479
+ ```
480
+
481
+ Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
482
+
451
483
  ```
452
- $ cp -a $(which embulk) embulk.jar
484
+ $ ./embulk.jar bundle install --path vendor/bundle
453
485
  ```
454
486
 
455
487
  Run tests with `env RUBYOPT="-r ./embulk.jar`:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.13"
3
+ spec.version = "0.6.2"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
8
8
  spec.licenses = ["MIT"]
9
9
  spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
10
 
11
- spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
11
+ # Exclude example directory which uses symlinks from generating gem.
12
+ # Symlinks do not work properly on the Windows platform without administrator privilege.
13
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
12
14
  spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
15
  spec.require_paths = ["lib"]
14
16
 
15
- spec.add_dependency 'google-api-client'
17
+ # TODO
18
+ # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
+ # So, Force install signet < 0.12 and google-api-client < 0.33.0
21
+ spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
22
+ spec.add_dependency 'google-api-client','< 0.33.0'
16
23
  spec.add_dependency 'time_with_zone'
17
24
 
18
25
  spec.add_development_dependency 'bundler', ['>= 1.10.6']