embulk-output-bigquery 0.4.13 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +10 -6
  3. data/CHANGELOG.md +28 -0
  4. data/Gemfile +2 -0
  5. data/README.md +107 -75
  6. data/embulk-output-bigquery.gemspec +10 -3
  7. data/lib/embulk/output/bigquery.rb +31 -50
  8. data/lib/embulk/output/bigquery/auth.rb +35 -0
  9. data/lib/embulk/output/bigquery/bigquery_client.rb +24 -31
  10. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  11. data/lib/embulk/output/bigquery/helper.rb +8 -4
  12. data/test/helper.rb +2 -1
  13. data/test/test_bigquery_client.rb +17 -21
  14. data/test/test_configure.rb +10 -19
  15. data/test/test_example.rb +5 -4
  16. data/test/test_transaction.rb +36 -76
  17. metadata +27 -49
  18. data/example/config_append_direct_schema_update_options.yml +0 -31
  19. data/example/config_client_options.yml +0 -33
  20. data/example/config_csv.yml +0 -30
  21. data/example/config_delete_in_advance.yml +0 -29
  22. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  23. data/example/config_expose_errors.yml +0 -30
  24. data/example/config_gcs.yml +0 -32
  25. data/example/config_guess_from_embulk_schema.yml +0 -29
  26. data/example/config_guess_with_column_options.yml +0 -40
  27. data/example/config_gzip.yml +0 -1
  28. data/example/config_jsonl.yml +0 -1
  29. data/example/config_max_threads.yml +0 -34
  30. data/example/config_min_ouput_tasks.yml +0 -34
  31. data/example/config_mode_append.yml +0 -30
  32. data/example/config_mode_append_direct.yml +0 -30
  33. data/example/config_nested_record.yml +0 -1
  34. data/example/config_payload_column.yml +0 -20
  35. data/example/config_payload_column_index.yml +0 -20
  36. data/example/config_prevent_duplicate_insert.yml +0 -30
  37. data/example/config_progress_log_interval.yml +0 -31
  38. data/example/config_replace.yml +0 -30
  39. data/example/config_replace_backup.yml +0 -32
  40. data/example/config_replace_backup_paritioned_table.yml +0 -34
  41. data/example/config_replace_paritioned_table.yml +0 -33
  42. data/example/config_replace_schema_update_options.yml +0 -33
  43. data/example/config_skip_file_generation.yml +0 -32
  44. data/example/config_table_strftime.yml +0 -30
  45. data/example/config_template_table.yml +0 -21
  46. data/example/config_uncompressed.yml +0 -1
  47. data/example/config_with_rehearsal.yml +0 -33
  48. data/example/example.csv +0 -17
  49. data/example/example.jsonl +0 -16
  50. data/example/example.yml +0 -1
  51. data/example/example2_1.csv +0 -1
  52. data/example/example2_2.csv +0 -1
  53. data/example/example4_1.csv +0 -1
  54. data/example/example4_2.csv +0 -1
  55. data/example/example4_3.csv +0 -1
  56. data/example/example4_4.csv +0 -1
  57. data/example/json_key.json +0 -12
  58. data/example/nested_example.jsonl +0 -16
  59. data/example/schema.json +0 -30
  60. data/example/schema_expose_errors.json +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4be15715120c9820fd039cd694a07a09826235d2
4
- data.tar.gz: e12dc361afd90c250a19ec186d559fc7eeb6ee85
3
+ metadata.gz: 2168730943154d9fb8d8ebfce9e4a1c2130b16b5
4
+ data.tar.gz: 8c4549b91f75d3e7a874f310e0df791bd9c28030
5
5
  SHA512:
6
- metadata.gz: 755ed9ddae0b079c84e57f49b5c852138fe4b441c2b49e0ad5ab6343ff6a785356078a8955600a34143716da1c758519d8903a841cb7eec2a989b3eeaa6e51fb
7
- data.tar.gz: e659fddc3c350df529845659e28d75a0c856e69ff35d2c78a99035c926f163d01d48b3e1d11ccdf7f46942aef616dac910c58c48c49f56ccb3b2338259c6d61d
6
+ metadata.gz: ae7e67855daddce745e3d9a8c5ee659d99c7e84510365bf216f63fb7ed92b9ce166cb2b005b76680020f67354a6f97cb3596d9b15490630cf1b927968e5b8f0e
7
+ data.tar.gz: 83531a34355dbf3af0ec602db9318ad52e0b6b9ba84b5ea1bef15f4a50b67fa69b664e0bf62deffe8f37ac62d9b995a6c5a135b69e5cf365df06d97f32264cfa
data/.travis.yml CHANGED
@@ -1,17 +1,21 @@
1
1
  language: ruby
2
2
  matrix:
3
3
  include:
4
- - env: EMBULK_VERSION=0.8.39
5
- rvm: jruby-9.1.5.0 # bundled jruby version
6
- jdk: openjdk7 # embulk 0.8.x uses jdk7
7
4
  - env: EMBULK_VERSION=0.9.15
8
- rvm: jruby-9.1.5.0 # bundled jruby version
5
+ rvm: jruby-9.1.15.0 # bundled jruby version
9
6
  jdk: openjdk8 # embulk 0.9.x uses jdk8
10
7
  - env: EMBULK_VERSION=latest
11
- rvm: jruby-9.1.5.0 # ?
8
+ rvm: jruby-9.1.15.0 # ?
12
9
  jdk: openjdk8 # ?
13
10
  allow_failures:
14
11
  - env: EMBULK_VERSION=latest
15
12
  before_install:
16
13
  - curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-${EMBULK_VERSION}.jar"
17
- script: bundle exec env RUBYOPT="-r ./embulk.jar" rake test
14
+ - chmod a+x embulk.jar
15
+ - BUNDLER_VERSION=$(echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb | tail -n 2 | tr -d '"')
16
+ - gem uninstall bundler -x
17
+ - gem install bundler -v ${BUNDLER_VERSION}
18
+ install:
19
+ - ./embulk.jar bundle install --jobs=3 --retry=3 --path vendor/bundle
20
+ script:
21
+ - bundle exec env RUBYOPT="-r ./embulk.jar -r embulk -r embulk/java/bootstrap" rake test
data/CHANGELOG.md CHANGED
@@ -1,3 +1,31 @@
1
+ ## 0.6.2 - 2019-10-16
2
+
3
+ * [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
4
+
5
+ ## 0.6.1 - 2019-08-28
6
+
7
+ * [maintenance] Release a new gem not to include symlinks to make it work on Windows.
8
+
9
+ ## 0.6.0 - 2019-08-11
10
+
11
+ Cleanup `auth_method`:
12
+
13
+ * [enhancement] Support `auth_method: authorized_user` (OAuth)
14
+ * [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
15
+ * [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
16
+ * [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
17
+
18
+ ## 0.5.0 - 2019-08-10
19
+
20
+ * [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
21
+ * [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
22
+ * [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
23
+ * [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
24
+
25
+ ## 0.4.14 - 2019-08-10
26
+
27
+ * [enhancement] Support field partitioning correctly.
28
+
1
29
  ## 0.4.13 - 2019-03-20
2
30
 
3
31
  * [enhancement] Support clustered table as an experimental feature
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
1
  source 'https://rubygems.org/'
2
2
 
3
3
  gemspec
4
+ gem 'embulk'
5
+ gem 'liquid', '= 4.0.0' # the version included in embulk.jar
4
6
  gem 'embulk-parser-none'
5
7
  gem 'embulk-parser-jsonl'
6
8
  gem 'pry-nav'
data/README.md CHANGED
@@ -23,34 +23,23 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
23
23
  Current version of this plugin supports Google API with Service Account Authentication, but does not support
24
24
  OAuth flow for installed applications.
25
25
 
26
- ### INCOMPATIBILITY CHANGES
27
-
28
- v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGELOG.md) for details.
29
-
30
- * `formatter` option (formatter plugin support) is dropped. Use `source_format` option instead. (it already exists in v0.2.x too)
31
- * `encoders` option (encoder plugin support) is dropped. Use `compression` option instead (it already exists in v0.2.x too).
32
- * `mode: append` mode now expresses a transactional append, and `mode: append_direct` is one which is not transactional.
33
-
34
26
  ## Configuration
35
27
 
36
28
  #### Original options
37
29
 
38
30
  | name | type | required? | default | description |
39
31
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
40
- | mode | string | optional | "append" | See [Mode](#mode) |
41
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
42
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
43
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
44
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
45
- | project | string | required if json_keyfile is not given | | project_id |
32
+ | mode | string | optional | "append" | See [Mode](#mode) |
33
+ | auth_method | string | optional | "application\_default" | See [Authentication](#authentication) |
34
+ | json_keyfile | string | optional | | keyfile path or `content` |
35
+ | project | string | required unless service\_account's `json_keyfile` is given. | | project\_id |
46
36
  | dataset | string | required | | dataset |
47
37
  | location | string | optional | nil | geographic location of dataset. See [Location](#location) |
48
38
  | table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
49
39
  | auto_create_dataset | boolean | optional | false | automatically create dataset |
50
- | auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
40
+ | auto_create_table | boolean | optional | true | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
51
41
  | schema_file | string | optional | | /path/to/schema.json |
52
42
  | template_table | string | optional | | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
53
- | prevent_duplicate_insert | boolean | optional | false | See [Prevent Duplication](#prevent-duplication) |
54
43
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
55
44
  | job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
56
45
  | is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
@@ -107,8 +96,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
107
96
  | time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
108
97
  | time_partitioning.expiration_ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. |
109
98
  | time_partitioning.field | string | optional | nil | `DATE` or `TIMESTAMP` column used for partitioning |
110
- | time_partitioning.require_partition_filter | boolean | optional | nil | If true, valid partition filter is required when query |
111
- | clustering | hash | optional | nil | (Experimental) Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. NOTE: **clustered tables** is a beta release. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
99
+ | clustering | hash | optional | nil | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
112
100
  | clustering.fields | array | required | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
113
101
  | schema_update_options | array | optional | nil | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
114
102
 
@@ -118,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
118
106
  out:
119
107
  type: bigquery
120
108
  mode: append
121
- auth_method: private_key # default
122
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
123
- p12_keyfile: /path/to/p12_keyfile.p12
109
+ auth_method: service_account
110
+ json_keyfile: /path/to/json_keyfile.json
124
111
  project: your-project-000
125
112
  dataset: your_dataset_name
126
113
  table: your_table_name
@@ -128,7 +115,7 @@ out:
128
115
  source_format: NEWLINE_DELIMITED_JSON
129
116
  ```
130
117
 
131
- ### location
118
+ ### Location
132
119
 
133
120
  The geographic location of the dataset. Required except for US and EU.
134
121
 
@@ -136,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
136
123
 
137
124
  See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
138
125
 
139
- ### mode
126
+ ### Mode
140
127
 
141
128
  5 modes are provided.
142
129
 
@@ -158,6 +145,8 @@ This is not transactional, i.e., if fails, the target table could have some rows
158
145
 
159
146
  ```is_skip_job_result_check``` must be false when replace mode
160
147
 
148
+ NOTE: BigQuery does not support replacing (actually, copying into) a non-partitioned table with a paritioned table atomically. You must once delete the non-partitioned table, otherwise, you get `Incompatible table partitioning specification when copying to the column partitioned table` error.
149
+
161
150
  ##### replace_backup
162
151
 
163
152
  1. Load to temporary table (Create and WRITE_APPEND in parallel)
@@ -173,53 +162,69 @@ This is not transactional, i.e., if fails, the target table could have some rows
173
162
 
174
163
  ### Authentication
175
164
 
176
- There are three methods supported to fetch access token for the service account.
165
+ There are four authentication methods
177
166
 
178
- 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
179
- 2. JSON key of GCP(Google Cloud Platform)'s service account
180
- 3. Pre-defined access token (Google Compute Engine only)
167
+ 1. `service_account` (or `json_key` for backward compatibility)
168
+ 1. `authorized_user`
169
+ 1. `compute_engine`
170
+ 1. `application_default`
181
171
 
182
- #### Public-Private key pair of GCP's service account
172
+ #### service\_account (or json\_key)
183
173
 
184
- You first need to create a service account (client ID),
185
- download its private key and deploy the key with embulk.
174
+ Use GCP service account credentials.
175
+ You first need to create a service account, download its json key and deploy the key with embulk.
186
176
 
187
177
  ```yaml
188
178
  out:
189
179
  type: bigquery
190
- auth_method: private_key # default
191
- service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
192
- p12_keyfile: /path/to/p12_keyfile.p12
180
+ auth_method: service_account
181
+ json_keyfile: /path/to/json_keyfile.json
193
182
  ```
194
183
 
195
- #### JSON key of GCP's service account
184
+ You can also embed contents of `json_keyfile` at config.yml.
196
185
 
197
- You first need to create a service account (client ID),
198
- download its json key and deploy the key with embulk.
186
+ ```yaml
187
+ out:
188
+ type: bigquery
189
+ auth_method: service_account
190
+ json_keyfile:
191
+ content: |
192
+ {
193
+ "private_key_id": "123456789",
194
+ "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
195
+ "client_email": "..."
196
+ }
197
+ ```
198
+
199
+ #### authorized\_user
200
+
201
+ Use Google user credentials.
202
+ You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
199
203
 
200
204
  ```yaml
201
205
  out:
202
206
  type: bigquery
203
- auth_method: json_key
204
- json_keyfile: /path/to/json_keyfile.json
207
+ auth_method: authorized_user
208
+ json_keyfile: /path/to/credentials.json
205
209
  ```
206
210
 
207
- You can also embed contents of json_keyfile at config.yml.
211
+ You can also embed contents of `json_keyfile` at config.yml.
208
212
 
209
213
  ```yaml
210
214
  out:
211
215
  type: bigquery
212
- auth_method: json_key
216
+ auth_method: authorized_user
213
217
  json_keyfile:
214
218
  content: |
215
219
  {
216
- "private_key_id": "123456789",
217
- "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
218
- "client_email": "..."
219
- }
220
+ "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
221
+ "client_secret":"xxxxxxxxxxx",
222
+ "refresh_token":"xxxxxxxxxxx",
223
+ "type":"authorized_user"
224
+ }
220
225
  ```
221
226
 
222
- #### Pre-defined access token(GCE only)
227
+ #### compute\_engine
223
228
 
224
229
  On the other hand, you don't need to explicitly create a service account for embulk when you
225
230
  run embulk in Google Compute Engine. In this third authentication method, you need to
@@ -232,6 +237,22 @@ out:
232
237
  auth_method: compute_engine
233
238
  ```
234
239
 
240
+ #### application\_default
241
+
242
+ Use Application Default Credentials (ADC). ADC is a strategy to locate Google Cloud Service Account credentials.
243
+
244
+ 1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
245
+ 2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
246
+ 3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
247
+
248
+ See https://cloud.google.com/docs/authentication/production for details.
249
+
250
+ ```yaml
251
+ out:
252
+ type: bigquery
253
+ auth_method: application_default
254
+ ```
255
+
235
256
  ### Table id formatting
236
257
 
237
258
  `table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
@@ -240,20 +261,16 @@ Table ids are formatted at runtime
240
261
  using the local time of the embulk server.
241
262
 
242
263
  For example, with the configuration below,
243
- data is inserted into tables `table_2015_04`, `table_2015_05` and so on.
264
+ data is inserted into tables `table_20150503`, `table_20150504` and so on.
244
265
 
245
266
  ```yaml
246
267
  out:
247
268
  type: bigquery
248
- table: table_%Y_%m
269
+ table: table_%Y%m%d
249
270
  ```
250
271
 
251
272
  ### Dynamic table creating
252
273
 
253
- When `auto_create_table` is set to true, try to create the table using BigQuery API.
254
-
255
- If table already exists, insert into it.
256
-
257
274
  There are 3 ways to set schema.
258
275
 
259
276
  #### Set schema.json
@@ -264,7 +281,7 @@ Please set file path of schema.json.
264
281
  out:
265
282
  type: bigquery
266
283
  auto_create_table: true
267
- table: table_%Y_%m
284
+ table: table_%Y%m%d
268
285
  schema_file: /path/to/schema.json
269
286
  ```
270
287
 
@@ -276,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
276
293
  out:
277
294
  type: bigquery
278
295
  auto_create_table: true
279
- table: table_%Y_%m
296
+ table: table_%Y%m%d
280
297
  template_table: existing_table_name
281
298
  ```
282
299
 
@@ -352,25 +369,9 @@ out:
352
369
  payload_column_index: 0 # or, payload_column: payload
353
370
  ```
354
371
 
355
- ### Prevent Duplication
356
-
357
- `prevent_duplicate_insert` option is used to prevent inserting same data for modes `append` or `append_direct`.
358
-
359
- When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options.
360
-
361
- `job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
362
-
363
- [job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency) so that same data can't be inserted with same settings repeatedly.
364
-
365
- ```yaml
366
- out:
367
- type: bigquery
368
- prevent_duplicate_insert: true
369
- ```
370
-
371
372
  ### GCS Bucket
372
373
 
373
- This is useful to reduce number of consumed jobs, which is limited by [50,000 jobs per project per day](https://cloud.google.com/bigquery/quota-policy#import).
374
+ This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
374
375
 
375
376
  This plugin originally loads local files into BigQuery in parallel, that is, consumes a number of jobs, say 24 jobs on 24 CPU core machine for example (this depends on embulk parameters such as `min_output_tasks` and `max_threads`).
376
377
 
@@ -398,32 +399,31 @@ To load into a partition, specify `table` parameter with a partition decorator a
398
399
  out:
399
400
  type: bigquery
400
401
  table: table_name$20160929
401
- auto_create_table: true
402
402
  ```
403
403
 
404
- You may configure `time_partitioning` parameter together to create table via `auto_create_table: true` option as:
404
+ You may configure `time_partitioning` parameter together as:
405
405
 
406
406
  ```yaml
407
407
  out:
408
408
  type: bigquery
409
409
  table: table_name$20160929
410
- auto_create_table: true
411
410
  time_partitioning:
412
411
  type: DAY
413
412
  expiration_ms: 259200000
414
413
  ```
415
414
 
416
415
  You can also create column-based partitioning table as:
416
+
417
417
  ```yaml
418
418
  out:
419
419
  type: bigquery
420
420
  mode: replace
421
- auto_create_table: true
422
421
  table: table_name
423
422
  time_partitioning:
424
423
  type: DAY
425
424
  field: timestamp
426
425
  ```
426
+
427
427
  Note the `time_partitioning.field` should be top-level `DATE` or `TIMESTAMP`.
428
428
 
429
429
  Use [Tables: patch](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) API to update the schema of the partitioned table, embulk-output-bigquery itself does not support it, though.
@@ -448,8 +448,40 @@ $ embulk run -X page_size=1 -b . -l trace example/example.yml
448
448
 
449
449
  Place your embulk with `.jar` extension:
450
450
 
451
+
452
+ ```
453
+ $ curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
454
+ $ chmod a+x embulk.jar
455
+ ```
456
+
457
+ Investigate JRUBY\_VERSION and Bundler::VERSION included in the embulk.jar:
458
+
459
+ ```
460
+ $ echo JRUBY_VERSION | ./embulk.jar irb
461
+ 2019-08-10 00:59:11.866 +0900: Embulk v0.9.17
462
+ Switch to inspect mode.
463
+ JRUBY_VERSION
464
+ "X.X.X.X"
465
+
466
+ $ echo "require 'bundler'; Bundler::VERSION" | ./embulk.jar irb
467
+ 2019-08-10 01:59:10.460 +0900: Embulk v0.9.17
468
+ Switch to inspect mode.
469
+ require 'bundler'; Bundler::VERSION
470
+ "Y.Y.Y"
471
+ ```
472
+
473
+ Install the same version of jruby (change X.X.X.X to the version shown above) and bundler:
474
+
475
+ ```
476
+ $ rbenv install jruby-X.X.X.X
477
+ $ rbenv local jruby-X.X.X.X
478
+ $ gem install bundler -v Y.Y.Y
479
+ ```
480
+
481
+ Install dependencies (NOTE: Use bundler included in the embulk.jar, otherwise, `gem 'embulk'` is not found):
482
+
451
483
  ```
452
- $ cp -a $(which embulk) embulk.jar
484
+ $ ./embulk.jar bundle install --path vendor/bundle
453
485
  ```
454
486
 
455
487
  Run tests with `env RUBYOPT="-r ./embulk.jar`:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.4.13"
3
+ spec.version = "0.6.2"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
8
8
  spec.licenses = ["MIT"]
9
9
  spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
10
 
11
- spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
11
+ # Exclude example directory which uses symlinks from generating gem.
12
+ # Symlinks do not work properly on the Windows platform without administrator privilege.
13
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
12
14
  spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
15
  spec.require_paths = ["lib"]
14
16
 
15
- spec.add_dependency 'google-api-client'
17
+ # TODO
18
+ # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
19
+ # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
20
+ # So, Force install signet < 0.12 and google-api-client < 0.33.0
21
+ spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
22
+ spec.add_dependency 'google-api-client','< 0.33.0'
16
23
  spec.add_dependency 'time_with_zone'
17
24
 
18
25
  spec.add_development_dependency 'bundler', ['>= 1.10.6']