embulk-output-bigquery 0.2.3 → 0.3.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +6 -12
- data/CHANGELOG.md +18 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +20 -0
- data/README.md +165 -39
- data/Rakefile +11 -0
- data/embulk-output-bigquery.gemspec +20 -0
- data/example/config_client_options.yml +33 -0
- data/example/config_csv.yml +30 -0
- data/example/config_delete_in_advance.yml +29 -0
- data/example/config_expose_errors.yml +30 -0
- data/example/config_guess_from_embulk_schema.yml +29 -0
- data/example/config_guess_with_column_options.yml +40 -0
- data/example/config_gzip.yml +30 -0
- data/example/config_jsonl.yml +30 -0
- data/example/config_mode_append.yml +30 -0
- data/example/config_mode_append_direct.yml +30 -0
- data/example/config_payload_column.yml +20 -0
- data/example/config_payload_column_index.yml +20 -0
- data/example/config_prevent_duplicate_insert.yml +30 -0
- data/example/config_replace.yml +30 -0
- data/example/config_replace_backup.yml +32 -0
- data/example/config_skip_file_generation.yml +32 -0
- data/example/config_table_strftime.yml +30 -0
- data/example/config_template_table.yml +21 -0
- data/example/config_uncompressed.yml +30 -0
- data/example/config_with_rehearsal.yml +32 -0
- data/example/example.csv +17 -0
- data/example/example.jsonl +16 -0
- data/example/example.yml +30 -0
- data/example/json_key.json +12 -0
- data/example/nested_example.jsonl +16 -0
- data/example/schema.json +30 -0
- data/example/schema_expose_errors.json +30 -0
- data/lib/embulk/output/bigquery.rb +388 -3
- data/lib/embulk/output/bigquery/bigquery_client.rb +396 -0
- data/lib/embulk/output/bigquery/file_writer.rb +103 -0
- data/lib/embulk/output/bigquery/helper.rb +78 -0
- data/lib/embulk/output/bigquery/value_converter_factory.rb +292 -0
- data/test/helper.rb +13 -0
- data/test/test_bigquery_client.rb +166 -0
- data/test/test_configure.rb +254 -0
- data/test/test_example.rb +34 -0
- data/test/test_file_writer.rb +129 -0
- data/test/test_helper.rb +103 -0
- data/test/test_transaction.rb +129 -0
- data/test/test_value_converter_factory.rb +316 -0
- metadata +114 -45
- data/build.gradle +0 -80
- data/config/checkstyle/checkstyle.xml +0 -128
- data/config/checkstyle/default.xml +0 -108
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +0 -6
- data/gradlew +0 -164
- data/gradlew.bat +0 -90
- data/settings.gradle +0 -2
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -117
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +0 -508
- data/src/main/java/org/embulk/output/BigqueryWriter.java +0 -575
- data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +0 -5
- data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +0 -5
- data/src/test/java/org/embulk/output/TestBigqueryWriter.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0907e6f02a9b05ea6a75e18d457cb641eede5973
|
4
|
+
data.tar.gz: f8119467b434636fc6f6696c36f2075eeb82e795
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b7313ca411cc3bb1fd064554ac706219e00fb358a445e2978e918c417aacc96ee972d3d49d30e9c1ebdb286066af4cf18305846fdf4dab06a1ced91249af2dc
|
7
|
+
data.tar.gz: 2f9e84d736de70a35369b4a003d8c1848a1433da902843d57b89a8b5ff5eb64bb945fceb828af2f565b87c271b9537dc433d53a6a1024c4ea014f74842abc0e1
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
## 0.3.0 - YYYY-MM-DD
|
2
|
+
|
3
|
+
Big change is introduced. Now, embulk-output-bigquery is written in JRuby.
|
4
|
+
|
5
|
+
* [new feature] Support parallel loads. Fix [#28](https://github.com/embulk/embulk-output-bigquery/issues/28).
|
6
|
+
* [new feature] Create table first. Fix [#29](https://github.com/embulk/embulk-output-bigquery/issues/29).
|
7
|
+
* [new feature] Introduce rehearsal mode. Fix [#30](https://github.com/embulk/embulk-output-bigquery/issues/30).
|
8
|
+
* [new feature] Support `dataset_old` option for `replace_backup`. Fix [#31](https://github.com/embulk/embulk-output-bigquery/issues/31).
|
9
|
+
* [maintenance] Fix default timestamp format to `%Y-%m-%d %H:%M:%S.%6`. Fix [#32](https://github.com/embulk/embulk-output-bigquery/issues/32).
|
10
|
+
* [new feature] Support request options such as `timeout_sec`, `open_timeout_sec`, `retries`. Fix [#33](https://github.com/embulk/embulk-output-bigquery/issues/33).
|
11
|
+
* [new feature] Support continuing from file generation with `skip_file_generation` option.
|
12
|
+
* [new feature] Guess BigQuery schema from Embulk schema. Fix [#1](https://github.com/embulk/embulk-output-bigquery/issues/1).
|
13
|
+
* [new feature] Support automatically create dataset.
|
14
|
+
* [new feature] Support transactional append mode.
|
15
|
+
* [incompatibility change] Formatter plugin support is dropped. Formatter is done in this plugin for specified `source_format`.
|
16
|
+
* [incompatibility change] Encoder plugin support is dropped. Encoding is done in this plugin for specified `compression`.
|
17
|
+
* [incompatibility change] `append` mode now expresses a transactional append, and `append_direct` is one which is not transactional (this was `append` mode before)
|
18
|
+
|
1
19
|
## 0.2.3 - 2016-02-19
|
2
20
|
|
3
21
|
* [maintenance] Fix detect logic of delete_in_advance mode. [#26](https://github.com/embulk/embulk-output-bigquery/issues/26). @sonots thanks!
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
# embulk-output-bigquery
|
3
2
|
|
4
3
|
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) using [direct insert](https://cloud.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest)
|
@@ -26,27 +25,53 @@ OAuth flow for installed applications.
|
|
26
25
|
|
27
26
|
#### Original options
|
28
27
|
|
29
|
-
| name
|
30
|
-
|
31
|
-
| mode
|
32
|
-
| auth_method
|
33
|
-
| service_account_email
|
34
|
-
| p12_keyfile
|
35
|
-
| json_keyfile
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
28
|
+
| name | type | required? | default | description |
|
29
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
30
|
+
| mode | string | optional | "append" | [See below](#mode) |
|
31
|
+
| auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
|
32
|
+
| service_account_email | string | required when auth_method is private_key | | Your Google service account email
|
33
|
+
| p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
|
34
|
+
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
35
|
+
| project | string | required if json_keyfile is not given | | project_id |
|
36
|
+
| dataset | string | required | | dataset |
|
37
|
+
| table | string | required | | table name |
|
38
|
+
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
39
|
+
| auto_create_table | boolean | optional | false | [See below](#dynamic-table-creating) |
|
40
|
+
| schema_file | string | optional | | /path/to/schema.json |
|
41
|
+
| template_table | string | optional | | template table name [See below](#dynamic-table-creating) |
|
42
|
+
| prevent_duplicate_insert | boolean | optional | false | [See below](#data-consistency) |
|
43
|
+
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
44
|
+
| job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
|
45
|
+
| is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
|
46
|
+
| with_rehearsal | boolean | optional | false | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
|
47
|
+
| rehearsal_counts | integer | optional | 1000 | Specify number of records to load in a rehearsal |
|
48
|
+
| column_options | hash | optional | | [See below](#column-options) |
|
49
|
+
| default_timezone | string | optional | UTC | |
|
50
|
+
| default_timestamp_format | string | optional | %Y-%m-%d %H:%M:%S.%6N | |
|
51
|
+
| payload_column | string | optional | nil | [See below](#formatter-performance-issue) |
|
52
|
+
| payload_column_index | integer | optional | nil | [See below](#formatter-performance-issue) |
|
53
|
+
|
54
|
+
Client or request options
|
55
|
+
|
56
|
+
| name | type | required? | default | description |
|
57
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
58
|
+
| timeout_sec | integer | optional | 300 | Seconds to wait for one block to be read |
|
59
|
+
| open_timeout_sec | integer | optional | 300 | Seconds to wait for the connection to open |
|
60
|
+
| retries | integer | optional | 5 | Number of retries |
|
61
|
+
| application_name | string | optional | "Embulk BigQuery plugin" | User-Agent |
|
62
|
+
|
63
|
+
Options for intermediate local files
|
64
|
+
|
65
|
+
| name | type | required? | default | description |
|
66
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
67
|
+
| path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
|
68
|
+
| sequence_format | string | optional | .%d.%03d | Sequence format for pid, task index |
|
69
|
+
| file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
|
70
|
+
| skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
|
71
|
+
| delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
|
72
|
+
| compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
|
73
|
+
|
74
|
+
`source_format` is also used to determine formatter (csv or jsonl).
|
50
75
|
|
51
76
|
#### Same options of bq command-line tools or BigQuery job's propery
|
52
77
|
|
@@ -54,7 +79,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
54
79
|
|
55
80
|
| name | type | required? | default | description |
|
56
81
|
|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
|
57
|
-
| source_format | string | required | "CSV"
|
82
|
+
| source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
|
58
83
|
| max_bad_records | int | optional | 0 | |
|
59
84
|
| field_delimiter | char | optional | "," | |
|
60
85
|
| encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
|
@@ -70,26 +95,26 @@ out:
|
|
70
95
|
auth_method: private_key # default
|
71
96
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
72
97
|
p12_keyfile: /path/to/p12_keyfile.p12
|
73
|
-
path_prefix: /path/to/output
|
74
|
-
file_ext: csv.gz
|
75
|
-
source_format: CSV
|
76
98
|
project: your-project-000
|
77
99
|
dataset: your_dataset_name
|
78
100
|
table: your_table_name
|
79
|
-
|
80
|
-
|
81
|
-
header_line: false
|
82
|
-
encoders:
|
83
|
-
- {type: gzip}
|
101
|
+
compression: GZIP
|
102
|
+
source_format: NEWLINE_DELIMITED_JSON
|
84
103
|
```
|
85
104
|
|
86
105
|
### mode
|
87
106
|
|
88
|
-
|
107
|
+
5 modes are provided.
|
89
108
|
|
90
109
|
##### append
|
91
110
|
|
92
|
-
|
111
|
+
1. Load to temporary table.
|
112
|
+
2. Copy temporary table to destination table. (WRITE_APPEND)
|
113
|
+
|
114
|
+
##### append_direct
|
115
|
+
|
116
|
+
Insert data into existing table directly.
|
117
|
+
This is not transactional, i.e., if fails, the target table could have some rows inserted.
|
93
118
|
|
94
119
|
##### replace
|
95
120
|
|
@@ -101,7 +126,7 @@ default. When append mode, plugin will insert data into existing table.
|
|
101
126
|
##### replace_backup
|
102
127
|
|
103
128
|
1. Load to temporary table.
|
104
|
-
2. Copy destination table to backup table. (
|
129
|
+
2. Copy destination table to backup table. (dataset_old, table_old)
|
105
130
|
3. Copy temporary table to destination table. (WRITE_TRUNCATE)
|
106
131
|
|
107
132
|
```is_skip_job_result_check``` must be false when replace_backup mode.
|
@@ -111,8 +136,6 @@ default. When append mode, plugin will insert data into existing table.
|
|
111
136
|
1. Delete destination table, if it exists.
|
112
137
|
2. Load to destination table.
|
113
138
|
|
114
|
-
```auto_create_table``` must be true when delete_in_advance mode.
|
115
|
-
|
116
139
|
### Authentication
|
117
140
|
|
118
141
|
There are three methods supported to fetch access token for the service account.
|
@@ -196,7 +219,7 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
|
|
196
219
|
|
197
220
|
If table already exists, insert into it.
|
198
221
|
|
199
|
-
There are
|
222
|
+
There are 3 ways to set schema.
|
200
223
|
|
201
224
|
#### Set schema.json
|
202
225
|
|
@@ -222,6 +245,78 @@ out:
|
|
222
245
|
template_table: existing_table_name
|
223
246
|
```
|
224
247
|
|
248
|
+
#### Guess from Embulk Schema
|
249
|
+
|
250
|
+
Plugin will try to guess BigQuery schema from Embulk schema. It is also configurable with `column_options`. See [Column Options](#column-options).
|
251
|
+
|
252
|
+
### Column Options
|
253
|
+
|
254
|
+
Column options are used to aid guessing BigQuery schema, or to define conversion of values:
|
255
|
+
|
256
|
+
- **column_options**: advanced: an array of options for columns
|
257
|
+
- **name**: column name
|
258
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
|
259
|
+
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
260
|
+
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
261
|
+
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
262
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
|
263
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
|
264
|
+
- json: `STRING`, `RECORD` (default: `STRING`)
|
265
|
+
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
266
|
+
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
267
|
+
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
268
|
+
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
269
|
+
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
270
|
+
- **default_timezone**: default timezone for column_options (string, default is "UTC")
|
271
|
+
|
272
|
+
Example)
|
273
|
+
|
274
|
+
```yaml
|
275
|
+
out:
|
276
|
+
type: bigquery
|
277
|
+
auto_create_table: true
|
278
|
+
column_options:
|
279
|
+
- {name: date, type: STRING, timestamp_format: %Y-%m-%d, timezone: "Asia/Tokyo"}
|
280
|
+
- name: json_column
|
281
|
+
type: RECORD
|
282
|
+
fields:
|
283
|
+
- {name: key1, type: STRING}
|
284
|
+
- {name: key2, type: STRING}
|
285
|
+
```
|
286
|
+
|
287
|
+
NOTE: Type conversion is done in this jruby plugin, and could be slow. See [Formatter Performance Issue](#formatter-performance-issue) to improve the performance.
|
288
|
+
|
289
|
+
### Formatter Performance Issue
|
290
|
+
|
291
|
+
embulk-output-bigquery supports formatting records into CSV or JSON (and also formatting timestamp column).
|
292
|
+
However, this plugin is written in jruby, and jruby plugins are slower than java plugins generally.
|
293
|
+
|
294
|
+
Therefore, it is recommended to format records with filter plugins written in Java such as [embulk-filter-to_json](https://github.com/civitaspo/embulk-filter-to_json) as:
|
295
|
+
|
296
|
+
```
|
297
|
+
filters:
|
298
|
+
- type: to_json
|
299
|
+
column: {name: payload, type: string}
|
300
|
+
default_format: %Y-%m-%d %H:%M:%S.%6N
|
301
|
+
out:
|
302
|
+
type: bigquery
|
303
|
+
payload_column_index: 0 # or, payload_column: payload
|
304
|
+
```
|
305
|
+
|
306
|
+
Furtheremore, if your files are originally jsonl or csv files, you can even skip a parser with [embulk-parser-none](https://github.com/sonots/embulk-parser-none) as:
|
307
|
+
|
308
|
+
```
|
309
|
+
in:
|
310
|
+
type: file
|
311
|
+
path_prefix: example/example.jsonl
|
312
|
+
parser:
|
313
|
+
type: none
|
314
|
+
column_name: payload
|
315
|
+
out:
|
316
|
+
type: bigquery
|
317
|
+
payload_column_index: 0 # or, payload_column: payload
|
318
|
+
```
|
319
|
+
|
225
320
|
### Data Consistency
|
226
321
|
|
227
322
|
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
|
@@ -238,8 +333,39 @@ out:
|
|
238
333
|
prevent_duplicate_insert: true
|
239
334
|
```
|
240
335
|
|
241
|
-
##
|
336
|
+
## Development
|
337
|
+
|
338
|
+
### Run example:
|
339
|
+
|
340
|
+
Prepare a json\_keyfile at /tmp/your-project-000.json, then
|
242
341
|
|
243
342
|
```
|
244
|
-
$
|
343
|
+
$ embulk bundle install --path vendor/bundle
|
344
|
+
$ embulk run -X page_size=1 -b . -l trace example/example.yml
|
345
|
+
```
|
346
|
+
|
347
|
+
### Run test:
|
348
|
+
|
245
349
|
```
|
350
|
+
$ bundle exec rake test
|
351
|
+
```
|
352
|
+
|
353
|
+
To run tests which actually connects to BigQuery such as test/test\_bigquery\_client.rb,
|
354
|
+
prepare a json\_keyfile at /tmp/your-project-000.json, then
|
355
|
+
|
356
|
+
```
|
357
|
+
$ CONNECT=1 bundle exec ruby test/test_bigquery_client.rb
|
358
|
+
$ CONNECT=1 bundle exec ruby test/test_example.rb
|
359
|
+
```
|
360
|
+
|
361
|
+
### Release gem:
|
362
|
+
|
363
|
+
Fix gemspec, then
|
364
|
+
|
365
|
+
```
|
366
|
+
$ bundle exec rake release
|
367
|
+
```
|
368
|
+
|
369
|
+
## ChangeLog
|
370
|
+
|
371
|
+
[CHANGELOG.md](CHANGELOG.md)
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
desc 'Run test_unit based test'
|
5
|
+
Rake::TestTask.new(:test) do |t|
|
6
|
+
t.libs << "test"
|
7
|
+
t.test_files = Dir["test/**/test_*.rb"].sort
|
8
|
+
t.verbose = true
|
9
|
+
#t.warning = true
|
10
|
+
end
|
11
|
+
task :default => :test
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "embulk-output-bigquery"
|
3
|
+
spec.version = "0.3.0.pre1"
|
4
|
+
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
|
+
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
|
+
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
7
|
+
spec.email = ["satoshiakama@gmail.com", "sonots@gmail.com"]
|
8
|
+
spec.licenses = ["MIT"]
|
9
|
+
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
10
|
+
|
11
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
12
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
|
+
spec.require_paths = ["lib"]
|
14
|
+
|
15
|
+
spec.add_dependency 'google-api-client'
|
16
|
+
spec.add_dependency "tzinfo"
|
17
|
+
spec.add_development_dependency 'embulk', ['>= 0.8.2']
|
18
|
+
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
19
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
20
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: /tmp/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
auto_create_dataset: true
|
28
|
+
auto_create_table: true
|
29
|
+
schema_file: example/schema.json
|
30
|
+
timeout_sec: 400
|
31
|
+
open_timeout_sec: 400
|
32
|
+
retries: 2
|
33
|
+
application_name: "Embulk BigQuery plugin test"
|
@@ -0,0 +1,30 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: /tmp/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: CSV
|
27
|
+
compression: GZIP
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|