embulk-output-bigquery 0.2.3 → 0.3.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -12
  3. data/CHANGELOG.md +18 -0
  4. data/Gemfile +8 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +165 -39
  7. data/Rakefile +11 -0
  8. data/embulk-output-bigquery.gemspec +20 -0
  9. data/example/config_client_options.yml +33 -0
  10. data/example/config_csv.yml +30 -0
  11. data/example/config_delete_in_advance.yml +29 -0
  12. data/example/config_expose_errors.yml +30 -0
  13. data/example/config_guess_from_embulk_schema.yml +29 -0
  14. data/example/config_guess_with_column_options.yml +40 -0
  15. data/example/config_gzip.yml +30 -0
  16. data/example/config_jsonl.yml +30 -0
  17. data/example/config_mode_append.yml +30 -0
  18. data/example/config_mode_append_direct.yml +30 -0
  19. data/example/config_payload_column.yml +20 -0
  20. data/example/config_payload_column_index.yml +20 -0
  21. data/example/config_prevent_duplicate_insert.yml +30 -0
  22. data/example/config_replace.yml +30 -0
  23. data/example/config_replace_backup.yml +32 -0
  24. data/example/config_skip_file_generation.yml +32 -0
  25. data/example/config_table_strftime.yml +30 -0
  26. data/example/config_template_table.yml +21 -0
  27. data/example/config_uncompressed.yml +30 -0
  28. data/example/config_with_rehearsal.yml +32 -0
  29. data/example/example.csv +17 -0
  30. data/example/example.jsonl +16 -0
  31. data/example/example.yml +30 -0
  32. data/example/json_key.json +12 -0
  33. data/example/nested_example.jsonl +16 -0
  34. data/example/schema.json +30 -0
  35. data/example/schema_expose_errors.json +30 -0
  36. data/lib/embulk/output/bigquery.rb +388 -3
  37. data/lib/embulk/output/bigquery/bigquery_client.rb +396 -0
  38. data/lib/embulk/output/bigquery/file_writer.rb +103 -0
  39. data/lib/embulk/output/bigquery/helper.rb +78 -0
  40. data/lib/embulk/output/bigquery/value_converter_factory.rb +292 -0
  41. data/test/helper.rb +13 -0
  42. data/test/test_bigquery_client.rb +166 -0
  43. data/test/test_configure.rb +254 -0
  44. data/test/test_example.rb +34 -0
  45. data/test/test_file_writer.rb +129 -0
  46. data/test/test_helper.rb +103 -0
  47. data/test/test_transaction.rb +129 -0
  48. data/test/test_value_converter_factory.rb +316 -0
  49. metadata +114 -45
  50. data/build.gradle +0 -80
  51. data/config/checkstyle/checkstyle.xml +0 -128
  52. data/config/checkstyle/default.xml +0 -108
  53. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  54. data/gradle/wrapper/gradle-wrapper.properties +0 -6
  55. data/gradlew +0 -164
  56. data/gradlew.bat +0 -90
  57. data/settings.gradle +0 -2
  58. data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -117
  59. data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +0 -508
  60. data/src/main/java/org/embulk/output/BigqueryWriter.java +0 -575
  61. data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +0 -5
  62. data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +0 -5
  63. data/src/test/java/org/embulk/output/TestBigqueryWriter.java +0 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e992d7d8add2b7ef5634d0fd4e41a4e90233e1e
4
- data.tar.gz: 1f87d54b14a5b5e7a52d3581d03a6a76b6d9b0f2
3
+ metadata.gz: 0907e6f02a9b05ea6a75e18d457cb641eede5973
4
+ data.tar.gz: f8119467b434636fc6f6696c36f2075eeb82e795
5
5
  SHA512:
6
- metadata.gz: 01ed587d80ba5c60be6a0e0ebaa4e9c9b3f576ab6151f384ff4887034f4c2e3f0a7c490a7685689ad8a79035844fd2c48130f6bb81e22fcf9e75cad0c5e9deea
7
- data.tar.gz: c8f728989565537ddeffde30b9ee5f8798e70f25e44739e90d0ae2e34ddeca6b3e45d131d538e91da90c430a4ae9ece87623d10c7bbd817184ad5c47cc8d80df
6
+ metadata.gz: 4b7313ca411cc3bb1fd064554ac706219e00fb358a445e2978e918c417aacc96ee972d3d49d30e9c1ebdb286066af4cf18305846fdf4dab06a1ced91249af2dc
7
+ data.tar.gz: 2f9e84d736de70a35369b4a003d8c1848a1433da902843d57b89a8b5ff5eb64bb945fceb828af2f565b87c271b9537dc433d53a6a1024c4ea014f74842abc0e1
data/.gitignore CHANGED
@@ -1,14 +1,8 @@
1
- target/
2
- build/
3
- pkg/
4
- *.iml
5
1
  *~
6
- ._*
7
- .idea
8
- tmp/
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
9
6
  vendor/
10
- /classpath/
11
- /.bundle
12
- .yardoc
13
- /embulk-*.jar
14
- /.gradle
7
+ .ruby-version
8
+ .tags
data/CHANGELOG.md CHANGED
@@ -1,3 +1,21 @@
1
+ ## 0.3.0 - YYYY-MM-DD
2
+
3
+ Big change is introduced. Now, embulk-output-bigquery is written in JRuby.
4
+
5
+ * [new feature] Support parallel loads. Fix [#28](https://github.com/embulk/embulk-output-bigquery/issues/28).
6
+ * [new feature] Create table first. Fix [#29](https://github.com/embulk/embulk-output-bigquery/issues/29).
7
+ * [new feature] Introduce rehearsal mode. Fix [#30](https://github.com/embulk/embulk-output-bigquery/issues/30).
8
+ * [new feature] Support `dataset_old` option for `replace_backup`. Fix [#31](https://github.com/embulk/embulk-output-bigquery/issues/31).
9
+ * [maintenance] Fix default timestamp format to `%Y-%m-%d %H:%M:%S.%6`. Fix [#32](https://github.com/embulk/embulk-output-bigquery/issues/32).
10
+ * [new feature] Support request options such as `timeout_sec`, `open_timeout_sec`, `retries`. Fix [#33](https://github.com/embulk/embulk-output-bigquery/issues/33).
11
+ * [new feature] Support continuing from file generation with `skip_file_generation` option.
12
+ * [new feature] Guess BigQuery schema from Embulk schema. Fix [#1](https://github.com/embulk/embulk-output-bigquery/issues/1).
13
+ * [new feature] Support automatically create dataset.
14
+ * [new feature] Support transactional append mode.
15
+ * [incompatibility change] Formatter plugin support is dropped. Formatter is done in this plugin for specified `source_format`.
16
+ * [incompatibility change] Encoder plugin support is dropped. Encoding is done in this plugin for specified `compression`.
17
+ * [incompatibility change] `append` mode now expresses a transactional append, and `append_direct` is one which is not transactional (this was `append` mode before)
18
+
1
19
  ## 0.2.3 - 2016-02-19
2
20
 
3
21
  * [maintenance] Fix detect logic of delete_in_advance mode. [#26](https://github.com/embulk/embulk-output-bigquery/issues/26). @sonots thanks!
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org/'
2
+
3
+ gemspec
4
+ gem 'embulk-parser-none'
5
+ gem 'embulk-parser-jsonl'
6
+ gem 'pry-nav'
7
+ gem 'test-unit'
8
+ gem 'test-unit-rr'
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ MIT License
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  # embulk-output-bigquery
3
2
 
4
3
  [Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) using [direct insert](https://cloud.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest)
@@ -26,27 +25,53 @@ OAuth flow for installed applications.
26
25
 
27
26
  #### Original options
28
27
 
29
- | name | type | required? | default | description |
30
- |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
- | mode | string | optional | "append" | [See below](#mode) |
32
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
33
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
34
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
35
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
36
- | sequence_format | string | optional | %03d.%02d | |
37
- | file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
38
- | project | string | required | | project_id |
39
- | dataset | string | required | | dataset |
40
- | table | string | required | | table name |
41
- | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
42
- | schema_file | string | optional | | /path/to/schema.json |
43
- | template_table | string | optional | | existing_table_name [See below](#dynamic-table-creating) |
44
- | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
45
- | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
46
- | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
47
- | job_status_max_polling_time | int | optional | 10 sec | Job status polling interval |
48
- | is_skip_job_result_check | boolean | optional | 0 | |
49
- | application_name | string | optional | "Embulk BigQuery plugin" | Anything you like |
28
+ | name | type | required? | default | description |
29
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
30
+ | mode | string | optional | "append" | [See below](#mode) |
31
+ | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
32
+ | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
+ | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
34
+ | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
35
+ | project | string | required if json_keyfile is not given | | project_id |
36
+ | dataset | string | required | | dataset |
37
+ | table | string | required | | table name |
38
+ | auto_create_dataset | boolean | optional | false | automatically create dataset |
39
+ | auto_create_table | boolean | optional | false | [See below](#dynamic-table-creating) |
40
+ | schema_file | string | optional | | /path/to/schema.json |
41
+ | template_table | string | optional | | template table name [See below](#dynamic-table-creating) |
42
+ | prevent_duplicate_insert | boolean | optional | false | [See below](#data-consistency) |
43
+ | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
44
+ | job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
45
+ | is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
46
+ | with_rehearsal | boolean | optional | false | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
47
+ | rehearsal_counts | integer | optional | 1000 | Specify number of records to load in a rehearsal |
48
+ | column_options | hash | optional | | [See below](#column-options) |
49
+ | default_timezone | string | optional | UTC | |
50
+ | default_timestamp_format | string | optional | %Y-%m-%d %H:%M:%S.%6N | |
51
+ | payload_column | string | optional | nil | [See below](#formatter-performance-issue) |
52
+ | payload_column_index | integer | optional | nil | [See below](#formatter-performance-issue) |
53
+
54
+ Client or request options
55
+
56
+ | name | type | required? | default | description |
57
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
58
+ | timeout_sec | integer | optional | 300 | Seconds to wait for one block to be read |
59
+ | open_timeout_sec | integer | optional | 300 | Seconds to wait for the connection to open |
60
+ | retries | integer | optional | 5 | Number of retries |
61
+ | application_name | string | optional | "Embulk BigQuery plugin" | User-Agent |
62
+
63
+ Options for intermediate local files
64
+
65
+ | name | type | required? | default | description |
66
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
67
+ | path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
68
+ | sequence_format | string | optional | .%d.%03d | Sequence format for pid, task index |
69
+ | file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
70
+ | skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
71
+ | delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
72
+ | compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
73
+
74
+ `source_format` is also used to determine formatter (csv or jsonl).
50
75
 
51
76
  #### Same options of bq command-line tools or BigQuery job's propery
52
77
 
@@ -54,7 +79,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
54
79
 
55
80
  | name | type | required? | default | description |
56
81
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
57
- | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
82
+ | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
58
83
  | max_bad_records | int | optional | 0 | |
59
84
  | field_delimiter | char | optional | "," | |
60
85
  | encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
@@ -70,26 +95,26 @@ out:
70
95
  auth_method: private_key # default
71
96
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
72
97
  p12_keyfile: /path/to/p12_keyfile.p12
73
- path_prefix: /path/to/output
74
- file_ext: csv.gz
75
- source_format: CSV
76
98
  project: your-project-000
77
99
  dataset: your_dataset_name
78
100
  table: your_table_name
79
- formatter:
80
- type: csv
81
- header_line: false
82
- encoders:
83
- - {type: gzip}
101
+ compression: GZIP
102
+ source_format: NEWLINE_DELIMITED_JSON
84
103
  ```
85
104
 
86
105
  ### mode
87
106
 
88
- 4 modes are provided.
107
+ 5 modes are provided.
89
108
 
90
109
  ##### append
91
110
 
92
- default. When append mode, plugin will insert data into existing table.
111
+ 1. Load to temporary table.
112
+ 2. Copy temporary table to destination table. (WRITE_APPEND)
113
+
114
+ ##### append_direct
115
+
116
+ Insert data into existing table directly.
117
+ This is not transactional, i.e., if fails, the target table could have some rows inserted.
93
118
 
94
119
  ##### replace
95
120
 
@@ -101,7 +126,7 @@ default. When append mode, plugin will insert data into existing table.
101
126
  ##### replace_backup
102
127
 
103
128
  1. Load to temporary table.
104
- 2. Copy destination table to backup table. (table_name_old)
129
+ 2. Copy destination table to backup table. (dataset_old, table_old)
105
130
  3. Copy temporary table to destination table. (WRITE_TRUNCATE)
106
131
 
107
132
  ```is_skip_job_result_check``` must be false when replace_backup mode.
@@ -111,8 +136,6 @@ default. When append mode, plugin will insert data into existing table.
111
136
  1. Delete destination table, if it exists.
112
137
  2. Load to destination table.
113
138
 
114
- ```auto_create_table``` must be true when delete_in_advance mode.
115
-
116
139
  ### Authentication
117
140
 
118
141
  There are three methods supported to fetch access token for the service account.
@@ -196,7 +219,7 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
196
219
 
197
220
  If table already exists, insert into it.
198
221
 
199
- There are 2 ways to set schema.
222
+ There are 3 ways to set schema.
200
223
 
201
224
  #### Set schema.json
202
225
 
@@ -222,6 +245,78 @@ out:
222
245
  template_table: existing_table_name
223
246
  ```
224
247
 
248
+ #### Guess from Embulk Schema
249
+
250
+ Plugin will try to guess BigQuery schema from Embulk schema. It is also configurable with `column_options`. See [Column Options](#column-options).
251
+
252
+ ### Column Options
253
+
254
+ Column options are used to aid guessing BigQuery schema, or to define conversion of values:
255
+
256
+ - **column_options**: advanced: an array of options for columns
257
+ - **name**: column name
258
+ - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
259
+ - boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
260
+ - long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
261
+ - double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
262
+ - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
263
+ - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
264
+ - json: `STRING`, `RECORD` (default: `STRING`)
265
+ - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
266
+ - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
267
+ - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
268
+ - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
269
+ - **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
270
+ - **default_timezone**: default timezone for column_options (string, default is "UTC")
271
+
272
+ Example)
273
+
274
+ ```yaml
275
+ out:
276
+ type: bigquery
277
+ auto_create_table: true
278
+ column_options:
279
+ - {name: date, type: STRING, timestamp_format: %Y-%m-%d, timezone: "Asia/Tokyo"}
280
+ - name: json_column
281
+ type: RECORD
282
+ fields:
283
+ - {name: key1, type: STRING}
284
+ - {name: key2, type: STRING}
285
+ ```
286
+
287
+ NOTE: Type conversion is done in this jruby plugin, and could be slow. See [Formatter Performance Issue](#formatter-performance-issue) to improve the performance.
288
+
289
+ ### Formatter Performance Issue
290
+
291
+ embulk-output-bigquery supports formatting records into CSV or JSON (and also formatting timestamp column).
292
+ However, this plugin is written in jruby, and jruby plugins are slower than java plugins generally.
293
+
294
+ Therefore, it is recommended to format records with filter plugins written in Java such as [embulk-filter-to_json](https://github.com/civitaspo/embulk-filter-to_json) as:
295
+
296
+ ```
297
+ filters:
298
+ - type: to_json
299
+ column: {name: payload, type: string}
300
+ default_format: %Y-%m-%d %H:%M:%S.%6N
301
+ out:
302
+ type: bigquery
303
+ payload_column_index: 0 # or, payload_column: payload
304
+ ```
305
+
306
+ Furtheremore, if your files are originally jsonl or csv files, you can even skip a parser with [embulk-parser-none](https://github.com/sonots/embulk-parser-none) as:
307
+
308
+ ```
309
+ in:
310
+ type: file
311
+ path_prefix: example/example.jsonl
312
+ parser:
313
+ type: none
314
+ column_name: payload
315
+ out:
316
+ type: bigquery
317
+ payload_column_index: 0 # or, payload_column: payload
318
+ ```
319
+
225
320
  ### Data Consistency
226
321
 
227
322
  When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
@@ -238,8 +333,39 @@ out:
238
333
  prevent_duplicate_insert: true
239
334
  ```
240
335
 
241
- ## Build
336
+ ## Development
337
+
338
+ ### Run example:
339
+
340
+ Prepare a json\_keyfile at /tmp/your-project-000.json, then
242
341
 
243
342
  ```
244
- $ ./gradlew gem
343
+ $ embulk bundle install --path vendor/bundle
344
+ $ embulk run -X page_size=1 -b . -l trace example/example.yml
345
+ ```
346
+
347
+ ### Run test:
348
+
245
349
  ```
350
+ $ bundle exec rake test
351
+ ```
352
+
353
+ To run tests which actually connects to BigQuery such as test/test\_bigquery\_client.rb,
354
+ prepare a json\_keyfile at /tmp/your-project-000.json, then
355
+
356
+ ```
357
+ $ CONNECT=1 bundle exec ruby test/test_bigquery_client.rb
358
+ $ CONNECT=1 bundle exec ruby test/test_example.rb
359
+ ```
360
+
361
+ ### Release gem:
362
+
363
+ Fix gemspec, then
364
+
365
+ ```
366
+ $ bundle exec rake release
367
+ ```
368
+
369
+ ## ChangeLog
370
+
371
+ [CHANGELOG.md](CHANGELOG.md)
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ desc 'Run test_unit based test'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.test_files = Dir["test/**/test_*.rb"].sort
8
+ t.verbose = true
9
+ #t.warning = true
10
+ end
11
+ task :default => :test
@@ -0,0 +1,20 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "embulk-output-bigquery"
3
+ spec.version = "0.3.0.pre1"
4
+ spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
+ spec.summary = "Google BigQuery output plugin for Embulk"
6
+ spec.description = "Embulk plugin that insert records to Google BigQuery."
7
+ spec.email = ["satoshiakama@gmail.com", "sonots@gmail.com"]
8
+ spec.licenses = ["MIT"]
9
+ spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
+
11
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
+ spec.require_paths = ["lib"]
14
+
15
+ spec.add_dependency 'google-api-client'
16
+ spec.add_dependency "tzinfo"
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.2']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
@@ -0,0 +1,33 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: /tmp/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ auto_create_dataset: true
28
+ auto_create_table: true
29
+ schema_file: example/schema.json
30
+ timeout_sec: 400
31
+ open_timeout_sec: 400
32
+ retries: 2
33
+ application_name: "Embulk BigQuery plugin test"
@@ -0,0 +1,30 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: /tmp/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: CSV
27
+ compression: GZIP
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json