embulk-output-bigquery 0.2.3 → 0.3.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -12
  3. data/CHANGELOG.md +18 -0
  4. data/Gemfile +8 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +165 -39
  7. data/Rakefile +11 -0
  8. data/embulk-output-bigquery.gemspec +20 -0
  9. data/example/config_client_options.yml +33 -0
  10. data/example/config_csv.yml +30 -0
  11. data/example/config_delete_in_advance.yml +29 -0
  12. data/example/config_expose_errors.yml +30 -0
  13. data/example/config_guess_from_embulk_schema.yml +29 -0
  14. data/example/config_guess_with_column_options.yml +40 -0
  15. data/example/config_gzip.yml +30 -0
  16. data/example/config_jsonl.yml +30 -0
  17. data/example/config_mode_append.yml +30 -0
  18. data/example/config_mode_append_direct.yml +30 -0
  19. data/example/config_payload_column.yml +20 -0
  20. data/example/config_payload_column_index.yml +20 -0
  21. data/example/config_prevent_duplicate_insert.yml +30 -0
  22. data/example/config_replace.yml +30 -0
  23. data/example/config_replace_backup.yml +32 -0
  24. data/example/config_skip_file_generation.yml +32 -0
  25. data/example/config_table_strftime.yml +30 -0
  26. data/example/config_template_table.yml +21 -0
  27. data/example/config_uncompressed.yml +30 -0
  28. data/example/config_with_rehearsal.yml +32 -0
  29. data/example/example.csv +17 -0
  30. data/example/example.jsonl +16 -0
  31. data/example/example.yml +30 -0
  32. data/example/json_key.json +12 -0
  33. data/example/nested_example.jsonl +16 -0
  34. data/example/schema.json +30 -0
  35. data/example/schema_expose_errors.json +30 -0
  36. data/lib/embulk/output/bigquery.rb +388 -3
  37. data/lib/embulk/output/bigquery/bigquery_client.rb +396 -0
  38. data/lib/embulk/output/bigquery/file_writer.rb +103 -0
  39. data/lib/embulk/output/bigquery/helper.rb +78 -0
  40. data/lib/embulk/output/bigquery/value_converter_factory.rb +292 -0
  41. data/test/helper.rb +13 -0
  42. data/test/test_bigquery_client.rb +166 -0
  43. data/test/test_configure.rb +254 -0
  44. data/test/test_example.rb +34 -0
  45. data/test/test_file_writer.rb +129 -0
  46. data/test/test_helper.rb +103 -0
  47. data/test/test_transaction.rb +129 -0
  48. data/test/test_value_converter_factory.rb +316 -0
  49. metadata +114 -45
  50. data/build.gradle +0 -80
  51. data/config/checkstyle/checkstyle.xml +0 -128
  52. data/config/checkstyle/default.xml +0 -108
  53. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  54. data/gradle/wrapper/gradle-wrapper.properties +0 -6
  55. data/gradlew +0 -164
  56. data/gradlew.bat +0 -90
  57. data/settings.gradle +0 -2
  58. data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -117
  59. data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +0 -508
  60. data/src/main/java/org/embulk/output/BigqueryWriter.java +0 -575
  61. data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +0 -5
  62. data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +0 -5
  63. data/src/test/java/org/embulk/output/TestBigqueryWriter.java +0 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e992d7d8add2b7ef5634d0fd4e41a4e90233e1e
4
- data.tar.gz: 1f87d54b14a5b5e7a52d3581d03a6a76b6d9b0f2
3
+ metadata.gz: 0907e6f02a9b05ea6a75e18d457cb641eede5973
4
+ data.tar.gz: f8119467b434636fc6f6696c36f2075eeb82e795
5
5
  SHA512:
6
- metadata.gz: 01ed587d80ba5c60be6a0e0ebaa4e9c9b3f576ab6151f384ff4887034f4c2e3f0a7c490a7685689ad8a79035844fd2c48130f6bb81e22fcf9e75cad0c5e9deea
7
- data.tar.gz: c8f728989565537ddeffde30b9ee5f8798e70f25e44739e90d0ae2e34ddeca6b3e45d131d538e91da90c430a4ae9ece87623d10c7bbd817184ad5c47cc8d80df
6
+ metadata.gz: 4b7313ca411cc3bb1fd064554ac706219e00fb358a445e2978e918c417aacc96ee972d3d49d30e9c1ebdb286066af4cf18305846fdf4dab06a1ced91249af2dc
7
+ data.tar.gz: 2f9e84d736de70a35369b4a003d8c1848a1433da902843d57b89a8b5ff5eb64bb945fceb828af2f565b87c271b9537dc433d53a6a1024c4ea014f74842abc0e1
data/.gitignore CHANGED
@@ -1,14 +1,8 @@
1
- target/
2
- build/
3
- pkg/
4
- *.iml
5
1
  *~
6
- ._*
7
- .idea
8
- tmp/
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
9
6
  vendor/
10
- /classpath/
11
- /.bundle
12
- .yardoc
13
- /embulk-*.jar
14
- /.gradle
7
+ .ruby-version
8
+ .tags
data/CHANGELOG.md CHANGED
@@ -1,3 +1,21 @@
1
+ ## 0.3.0 - YYYY-MM-DD
2
+
3
+ Big change is introduced. Now, embulk-output-bigquery is written in JRuby.
4
+
5
+ * [new feature] Support parallel loads. Fix [#28](https://github.com/embulk/embulk-output-bigquery/issues/28).
6
+ * [new feature] Create table first. Fix [#29](https://github.com/embulk/embulk-output-bigquery/issues/29).
7
+ * [new feature] Introduce rehearsal mode. Fix [#30](https://github.com/embulk/embulk-output-bigquery/issues/30).
8
+ * [new feature] Support `dataset_old` option for `replace_backup`. Fix [#31](https://github.com/embulk/embulk-output-bigquery/issues/31).
9
+ * [maintenance] Fix default timestamp format to `%Y-%m-%d %H:%M:%S.%6`. Fix [#32](https://github.com/embulk/embulk-output-bigquery/issues/32).
10
+ * [new feature] Support request options such as `timeout_sec`, `open_timeout_sec`, `retries`. Fix [#33](https://github.com/embulk/embulk-output-bigquery/issues/33).
11
+ * [new feature] Support continuing from file generation with `skip_file_generation` option.
12
+ * [new feature] Guess BigQuery schema from Embulk schema. Fix [#1](https://github.com/embulk/embulk-output-bigquery/issues/1).
13
+ * [new feature] Support automatically create dataset.
14
+ * [new feature] Support transactional append mode.
15
+ * [incompatibility change] Formatter plugin support is dropped. Formatter is done in this plugin for specified `source_format`.
16
+ * [incompatibility change] Encoder plugin support is dropped. Encoding is done in this plugin for specified `compression`.
17
+ * [incompatibility change] `append` mode now expresses a transactional append, and `append_direct` is one which is not transactional (this was `append` mode before)
18
+
1
19
  ## 0.2.3 - 2016-02-19
2
20
 
3
21
  * [maintenance] Fix detect logic of delete_in_advance mode. [#26](https://github.com/embulk/embulk-output-bigquery/issues/26). @sonots thanks!
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org/'
2
+
3
+ gemspec
4
+ gem 'embulk-parser-none'
5
+ gem 'embulk-parser-jsonl'
6
+ gem 'pry-nav'
7
+ gem 'test-unit'
8
+ gem 'test-unit-rr'
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ MIT License
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  # embulk-output-bigquery
3
2
 
4
3
  [Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) using [direct insert](https://cloud.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest)
@@ -26,27 +25,53 @@ OAuth flow for installed applications.
26
25
 
27
26
  #### Original options
28
27
 
29
- | name | type | required? | default | description |
30
- |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
- | mode | string | optional | "append" | [See below](#mode) |
32
- | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
33
- | service_account_email | string | required when auth_method is private_key | | Your Google service account email
34
- | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
35
- | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
36
- | sequence_format | string | optional | %03d.%02d | |
37
- | file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
38
- | project | string | required | | project_id |
39
- | dataset | string | required | | dataset |
40
- | table | string | required | | table name |
41
- | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
42
- | schema_file | string | optional | | /path/to/schema.json |
43
- | template_table | string | optional | | existing_table_name [See below](#dynamic-table-creating) |
44
- | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
45
- | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
46
- | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
47
- | job_status_max_polling_time | int | optional | 10 sec | Job status polling interval |
48
- | is_skip_job_result_check | boolean | optional | 0 | |
49
- | application_name | string | optional | "Embulk BigQuery plugin" | Anything you like |
28
+ | name | type | required? | default | description |
29
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
30
+ | mode | string | optional | "append" | [See below](#mode) |
31
+ | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
32
+ | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
+ | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
34
+ | json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
35
+ | project | string | required if json_keyfile is not given | | project_id |
36
+ | dataset | string | required | | dataset |
37
+ | table | string | required | | table name |
38
+ | auto_create_dataset | boolean | optional | false | automatically create dataset |
39
+ | auto_create_table | boolean | optional | false | [See below](#dynamic-table-creating) |
40
+ | schema_file | string | optional | | /path/to/schema.json |
41
+ | template_table | string | optional | | template table name [See below](#dynamic-table-creating) |
42
+ | prevent_duplicate_insert | boolean | optional | false | [See below](#data-consistency) |
43
+ | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
44
+ | job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
45
+ | is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
46
+ | with_rehearsal | boolean | optional | false | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
47
+ | rehearsal_counts | integer | optional | 1000 | Specify number of records to load in a rehearsal |
48
+ | column_options | hash | optional | | [See below](#column-options) |
49
+ | default_timezone | string | optional | UTC | |
50
+ | default_timestamp_format | string | optional | %Y-%m-%d %H:%M:%S.%6N | |
51
+ | payload_column | string | optional | nil | [See below](#formatter-performance-issue) |
52
+ | payload_column_index | integer | optional | nil | [See below](#formatter-performance-issue) |
53
+
54
+ Client or request options
55
+
56
+ | name | type | required? | default | description |
57
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
58
+ | timeout_sec | integer | optional | 300 | Seconds to wait for one block to be read |
59
+ | open_timeout_sec | integer | optional | 300 | Seconds to wait for the connection to open |
60
+ | retries | integer | optional | 5 | Number of retries |
61
+ | application_name | string | optional | "Embulk BigQuery plugin" | User-Agent |
62
+
63
+ Options for intermediate local files
64
+
65
+ | name | type | required? | default | description |
66
+ |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
67
+ | path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
68
+ | sequence_format | string | optional | .%d.%03d | Sequence format for pid, task index |
69
+ | file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
70
+ | skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
71
+ | delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
72
+ | compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
73
+
74
+ `source_format` is also used to determine formatter (csv or jsonl).
50
75
 
51
76
  #### Same options of bq command-line tools or BigQuery job's propery
52
77
 
@@ -54,7 +79,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
54
79
 
55
80
  | name | type | required? | default | description |
56
81
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
57
- | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
82
+ | source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
58
83
  | max_bad_records | int | optional | 0 | |
59
84
  | field_delimiter | char | optional | "," | |
60
85
  | encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
@@ -70,26 +95,26 @@ out:
70
95
  auth_method: private_key # default
71
96
  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
72
97
  p12_keyfile: /path/to/p12_keyfile.p12
73
- path_prefix: /path/to/output
74
- file_ext: csv.gz
75
- source_format: CSV
76
98
  project: your-project-000
77
99
  dataset: your_dataset_name
78
100
  table: your_table_name
79
- formatter:
80
- type: csv
81
- header_line: false
82
- encoders:
83
- - {type: gzip}
101
+ compression: GZIP
102
+ source_format: NEWLINE_DELIMITED_JSON
84
103
  ```
85
104
 
86
105
  ### mode
87
106
 
88
- 4 modes are provided.
107
+ 5 modes are provided.
89
108
 
90
109
  ##### append
91
110
 
92
- default. When append mode, plugin will insert data into existing table.
111
+ 1. Load to temporary table.
112
+ 2. Copy temporary table to destination table. (WRITE_APPEND)
113
+
114
+ ##### append_direct
115
+
116
+ Insert data into existing table directly.
117
+ This is not transactional, i.e., if fails, the target table could have some rows inserted.
93
118
 
94
119
  ##### replace
95
120
 
@@ -101,7 +126,7 @@ default. When append mode, plugin will insert data into existing table.
101
126
  ##### replace_backup
102
127
 
103
128
  1. Load to temporary table.
104
- 2. Copy destination table to backup table. (table_name_old)
129
+ 2. Copy destination table to backup table. (dataset_old, table_old)
105
130
  3. Copy temporary table to destination table. (WRITE_TRUNCATE)
106
131
 
107
132
  ```is_skip_job_result_check``` must be false when replace_backup mode.
@@ -111,8 +136,6 @@ default. When append mode, plugin will insert data into existing table.
111
136
  1. Delete destination table, if it exists.
112
137
  2. Load to destination table.
113
138
 
114
- ```auto_create_table``` must be true when delete_in_advance mode.
115
-
116
139
  ### Authentication
117
140
 
118
141
  There are three methods supported to fetch access token for the service account.
@@ -196,7 +219,7 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
196
219
 
197
220
  If table already exists, insert into it.
198
221
 
199
- There are 2 ways to set schema.
222
+ There are 3 ways to set schema.
200
223
 
201
224
  #### Set schema.json
202
225
 
@@ -222,6 +245,78 @@ out:
222
245
  template_table: existing_table_name
223
246
  ```
224
247
 
248
+ #### Guess from Embulk Schema
249
+
250
+ Plugin will try to guess BigQuery schema from Embulk schema. It is also configurable with `column_options`. See [Column Options](#column-options).
251
+
252
+ ### Column Options
253
+
254
+ Column options are used to aid guessing BigQuery schema, or to define conversion of values:
255
+
256
+ - **column_options**: advanced: an array of options for columns
257
+ - **name**: column name
258
+ - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
259
+ - boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
260
+ - long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
261
+ - double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
262
+ - string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
263
+ - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
264
+ - json: `STRING`, `RECORD` (default: `STRING`)
265
+ - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
266
+ - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
267
+ - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
268
+ - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
269
+ - **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
270
+ - **default_timezone**: default timezone for column_options (string, default is "UTC")
271
+
272
+ Example)
273
+
274
+ ```yaml
275
+ out:
276
+ type: bigquery
277
+ auto_create_table: true
278
+ column_options:
279
+ - {name: date, type: STRING, timestamp_format: %Y-%m-%d, timezone: "Asia/Tokyo"}
280
+ - name: json_column
281
+ type: RECORD
282
+ fields:
283
+ - {name: key1, type: STRING}
284
+ - {name: key2, type: STRING}
285
+ ```
286
+
287
+ NOTE: Type conversion is done in this jruby plugin, and could be slow. See [Formatter Performance Issue](#formatter-performance-issue) to improve the performance.
288
+
289
+ ### Formatter Performance Issue
290
+
291
+ embulk-output-bigquery supports formatting records into CSV or JSON (and also formatting timestamp column).
292
+ However, this plugin is written in jruby, and jruby plugins are slower than java plugins generally.
293
+
294
+ Therefore, it is recommended to format records with filter plugins written in Java such as [embulk-filter-to_json](https://github.com/civitaspo/embulk-filter-to_json) as:
295
+
296
+ ```
297
+ filters:
298
+ - type: to_json
299
+ column: {name: payload, type: string}
300
+ default_format: %Y-%m-%d %H:%M:%S.%6N
301
+ out:
302
+ type: bigquery
303
+ payload_column_index: 0 # or, payload_column: payload
304
+ ```
305
+
306
+ Furtheremore, if your files are originally jsonl or csv files, you can even skip a parser with [embulk-parser-none](https://github.com/sonots/embulk-parser-none) as:
307
+
308
+ ```
309
+ in:
310
+ type: file
311
+ path_prefix: example/example.jsonl
312
+ parser:
313
+ type: none
314
+ column_name: payload
315
+ out:
316
+ type: bigquery
317
+ payload_column_index: 0 # or, payload_column: payload
318
+ ```
319
+
225
320
  ### Data Consistency
226
321
 
227
322
  When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
@@ -238,8 +333,39 @@ out:
238
333
  prevent_duplicate_insert: true
239
334
  ```
240
335
 
241
- ## Build
336
+ ## Development
337
+
338
+ ### Run example:
339
+
340
+ Prepare a json\_keyfile at /tmp/your-project-000.json, then
242
341
 
243
342
  ```
244
- $ ./gradlew gem
343
+ $ embulk bundle install --path vendor/bundle
344
+ $ embulk run -X page_size=1 -b . -l trace example/example.yml
345
+ ```
346
+
347
+ ### Run test:
348
+
245
349
  ```
350
+ $ bundle exec rake test
351
+ ```
352
+
353
+ To run tests which actually connects to BigQuery such as test/test\_bigquery\_client.rb,
354
+ prepare a json\_keyfile at /tmp/your-project-000.json, then
355
+
356
+ ```
357
+ $ CONNECT=1 bundle exec ruby test/test_bigquery_client.rb
358
+ $ CONNECT=1 bundle exec ruby test/test_example.rb
359
+ ```
360
+
361
+ ### Release gem:
362
+
363
+ Fix gemspec, then
364
+
365
+ ```
366
+ $ bundle exec rake release
367
+ ```
368
+
369
+ ## ChangeLog
370
+
371
+ [CHANGELOG.md](CHANGELOG.md)
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ desc 'Run test_unit based test'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.test_files = Dir["test/**/test_*.rb"].sort
8
+ t.verbose = true
9
+ #t.warning = true
10
+ end
11
+ task :default => :test
@@ -0,0 +1,20 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "embulk-output-bigquery"
3
+ spec.version = "0.3.0.pre1"
4
+ spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
+ spec.summary = "Google BigQuery output plugin for Embulk"
6
+ spec.description = "Embulk plugin that insert records to Google BigQuery."
7
+ spec.email = ["satoshiakama@gmail.com", "sonots@gmail.com"]
8
+ spec.licenses = ["MIT"]
9
+ spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
10
+
11
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
+ spec.require_paths = ["lib"]
14
+
15
+ spec.add_dependency 'google-api-client'
16
+ spec.add_dependency "tzinfo"
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.2']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
@@ -0,0 +1,33 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: /tmp/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ auto_create_dataset: true
28
+ auto_create_table: true
29
+ schema_file: example/schema.json
30
+ timeout_sec: 400
31
+ open_timeout_sec: 400
32
+ retries: 2
33
+ application_name: "Embulk BigQuery plugin test"
@@ -0,0 +1,30 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: /tmp/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: CSV
27
+ compression: GZIP
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json