embulk-output-bigquery 0.2.3 → 0.3.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -12
- data/CHANGELOG.md +18 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +20 -0
- data/README.md +165 -39
- data/Rakefile +11 -0
- data/embulk-output-bigquery.gemspec +20 -0
- data/example/config_client_options.yml +33 -0
- data/example/config_csv.yml +30 -0
- data/example/config_delete_in_advance.yml +29 -0
- data/example/config_expose_errors.yml +30 -0
- data/example/config_guess_from_embulk_schema.yml +29 -0
- data/example/config_guess_with_column_options.yml +40 -0
- data/example/config_gzip.yml +30 -0
- data/example/config_jsonl.yml +30 -0
- data/example/config_mode_append.yml +30 -0
- data/example/config_mode_append_direct.yml +30 -0
- data/example/config_payload_column.yml +20 -0
- data/example/config_payload_column_index.yml +20 -0
- data/example/config_prevent_duplicate_insert.yml +30 -0
- data/example/config_replace.yml +30 -0
- data/example/config_replace_backup.yml +32 -0
- data/example/config_skip_file_generation.yml +32 -0
- data/example/config_table_strftime.yml +30 -0
- data/example/config_template_table.yml +21 -0
- data/example/config_uncompressed.yml +30 -0
- data/example/config_with_rehearsal.yml +32 -0
- data/example/example.csv +17 -0
- data/example/example.jsonl +16 -0
- data/example/example.yml +30 -0
- data/example/json_key.json +12 -0
- data/example/nested_example.jsonl +16 -0
- data/example/schema.json +30 -0
- data/example/schema_expose_errors.json +30 -0
- data/lib/embulk/output/bigquery.rb +388 -3
- data/lib/embulk/output/bigquery/bigquery_client.rb +396 -0
- data/lib/embulk/output/bigquery/file_writer.rb +103 -0
- data/lib/embulk/output/bigquery/helper.rb +78 -0
- data/lib/embulk/output/bigquery/value_converter_factory.rb +292 -0
- data/test/helper.rb +13 -0
- data/test/test_bigquery_client.rb +166 -0
- data/test/test_configure.rb +254 -0
- data/test/test_example.rb +34 -0
- data/test/test_file_writer.rb +129 -0
- data/test/test_helper.rb +103 -0
- data/test/test_transaction.rb +129 -0
- data/test/test_value_converter_factory.rb +316 -0
- metadata +114 -45
- data/build.gradle +0 -80
- data/config/checkstyle/checkstyle.xml +0 -128
- data/config/checkstyle/default.xml +0 -108
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +0 -6
- data/gradlew +0 -164
- data/gradlew.bat +0 -90
- data/settings.gradle +0 -2
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -117
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +0 -508
- data/src/main/java/org/embulk/output/BigqueryWriter.java +0 -575
- data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +0 -5
- data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +0 -5
- data/src/test/java/org/embulk/output/TestBigqueryWriter.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0907e6f02a9b05ea6a75e18d457cb641eede5973
|
4
|
+
data.tar.gz: f8119467b434636fc6f6696c36f2075eeb82e795
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b7313ca411cc3bb1fd064554ac706219e00fb358a445e2978e918c417aacc96ee972d3d49d30e9c1ebdb286066af4cf18305846fdf4dab06a1ced91249af2dc
|
7
|
+
data.tar.gz: 2f9e84d736de70a35369b4a003d8c1848a1433da902843d57b89a8b5ff5eb64bb945fceb828af2f565b87c271b9537dc433d53a6a1024c4ea014f74842abc0e1
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
## 0.3.0 - YYYY-MM-DD
|
2
|
+
|
3
|
+
Big change is introduced. Now, embulk-output-bigquery is written in JRuby.
|
4
|
+
|
5
|
+
* [new feature] Support parallel loads. Fix [#28](https://github.com/embulk/embulk-output-bigquery/issues/28).
|
6
|
+
* [new feature] Create table first. Fix [#29](https://github.com/embulk/embulk-output-bigquery/issues/29).
|
7
|
+
* [new feature] Introduce rehearsal mode. Fix [#30](https://github.com/embulk/embulk-output-bigquery/issues/30).
|
8
|
+
* [new feature] Support `dataset_old` option for `replace_backup`. Fix [#31](https://github.com/embulk/embulk-output-bigquery/issues/31).
|
9
|
+
* [maintenance] Fix default timestamp format to `%Y-%m-%d %H:%M:%S.%6`. Fix [#32](https://github.com/embulk/embulk-output-bigquery/issues/32).
|
10
|
+
* [new feature] Support request options such as `timeout_sec`, `open_timeout_sec`, `retries`. Fix [#33](https://github.com/embulk/embulk-output-bigquery/issues/33).
|
11
|
+
* [new feature] Support continuing from file generation with `skip_file_generation` option.
|
12
|
+
* [new feature] Guess BigQuery schema from Embulk schema. Fix [#1](https://github.com/embulk/embulk-output-bigquery/issues/1).
|
13
|
+
* [new feature] Support automatically create dataset.
|
14
|
+
* [new feature] Support transactional append mode.
|
15
|
+
* [incompatibility change] Formatter plugin support is dropped. Formatter is done in this plugin for specified `source_format`.
|
16
|
+
* [incompatibility change] Encoder plugin support is dropped. Encoding is done in this plugin for specified `compression`.
|
17
|
+
* [incompatibility change] `append` mode now expresses a transactional append, and `append_direct` is one which is not transactional (this was `append` mode before)
|
18
|
+
|
1
19
|
## 0.2.3 - 2016-02-19
|
2
20
|
|
3
21
|
* [maintenance] Fix detect logic of delete_in_advance mode. [#26](https://github.com/embulk/embulk-output-bigquery/issues/26). @sonots thanks!
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
# embulk-output-bigquery
|
3
2
|
|
4
3
|
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) using [direct insert](https://cloud.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest)
|
@@ -26,27 +25,53 @@ OAuth flow for installed applications.
|
|
26
25
|
|
27
26
|
#### Original options
|
28
27
|
|
29
|
-
| name
|
30
|
-
|
31
|
-
| mode
|
32
|
-
| auth_method
|
33
|
-
| service_account_email
|
34
|
-
| p12_keyfile
|
35
|
-
| json_keyfile
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
28
|
+
| name | type | required? | default | description |
|
29
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
30
|
+
| mode | string | optional | "append" | [See below](#mode) |
|
31
|
+
| auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
|
32
|
+
| service_account_email | string | required when auth_method is private_key | | Your Google service account email
|
33
|
+
| p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
|
34
|
+
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
35
|
+
| project | string | required if json_keyfile is not given | | project_id |
|
36
|
+
| dataset | string | required | | dataset |
|
37
|
+
| table | string | required | | table name |
|
38
|
+
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
39
|
+
| auto_create_table | boolean | optional | false | [See below](#dynamic-table-creating) |
|
40
|
+
| schema_file | string | optional | | /path/to/schema.json |
|
41
|
+
| template_table | string | optional | | template table name [See below](#dynamic-table-creating) |
|
42
|
+
| prevent_duplicate_insert | boolean | optional | false | [See below](#data-consistency) |
|
43
|
+
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
44
|
+
| job_status_polling_interval | int | optional | 10 sec | Job status polling interval |
|
45
|
+
| is_skip_job_result_check | boolean | optional | false | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
|
46
|
+
| with_rehearsal | boolean | optional | false | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
|
47
|
+
| rehearsal_counts | integer | optional | 1000 | Specify number of records to load in a rehearsal |
|
48
|
+
| column_options | hash | optional | | [See below](#column-options) |
|
49
|
+
| default_timezone | string | optional | UTC | |
|
50
|
+
| default_timestamp_format | string | optional | %Y-%m-%d %H:%M:%S.%6N | |
|
51
|
+
| payload_column | string | optional | nil | [See below](#formatter-performance-issue) |
|
52
|
+
| payload_column_index | integer | optional | nil | [See below](#formatter-performance-issue) |
|
53
|
+
|
54
|
+
Client or request options
|
55
|
+
|
56
|
+
| name | type | required? | default | description |
|
57
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
58
|
+
| timeout_sec | integer | optional | 300 | Seconds to wait for one block to be read |
|
59
|
+
| open_timeout_sec | integer | optional | 300 | Seconds to wait for the connection to open |
|
60
|
+
| retries | integer | optional | 5 | Number of retries |
|
61
|
+
| application_name | string | optional | "Embulk BigQuery plugin" | User-Agent |
|
62
|
+
|
63
|
+
Options for intermediate local files
|
64
|
+
|
65
|
+
| name | type | required? | default | description |
|
66
|
+
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
67
|
+
| path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
|
68
|
+
| sequence_format | string | optional | .%d.%03d | Sequence format for pid, task index |
|
69
|
+
| file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
|
70
|
+
| skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
|
71
|
+
| delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
|
72
|
+
| compression | string | optional | "NONE" | Compression of local files (`GZIP` or `NONE`) |
|
73
|
+
|
74
|
+
`source_format` is also used to determine formatter (csv or jsonl).
|
50
75
|
|
51
76
|
#### Same options of bq command-line tools or BigQuery job's propery
|
52
77
|
|
@@ -54,7 +79,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
54
79
|
|
55
80
|
| name | type | required? | default | description |
|
56
81
|
|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
|
57
|
-
| source_format | string | required | "CSV"
|
82
|
+
| source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
|
58
83
|
| max_bad_records | int | optional | 0 | |
|
59
84
|
| field_delimiter | char | optional | "," | |
|
60
85
|
| encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
|
@@ -70,26 +95,26 @@ out:
|
|
70
95
|
auth_method: private_key # default
|
71
96
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
72
97
|
p12_keyfile: /path/to/p12_keyfile.p12
|
73
|
-
path_prefix: /path/to/output
|
74
|
-
file_ext: csv.gz
|
75
|
-
source_format: CSV
|
76
98
|
project: your-project-000
|
77
99
|
dataset: your_dataset_name
|
78
100
|
table: your_table_name
|
79
|
-
|
80
|
-
|
81
|
-
header_line: false
|
82
|
-
encoders:
|
83
|
-
- {type: gzip}
|
101
|
+
compression: GZIP
|
102
|
+
source_format: NEWLINE_DELIMITED_JSON
|
84
103
|
```
|
85
104
|
|
86
105
|
### mode
|
87
106
|
|
88
|
-
|
107
|
+
5 modes are provided.
|
89
108
|
|
90
109
|
##### append
|
91
110
|
|
92
|
-
|
111
|
+
1. Load to temporary table.
|
112
|
+
2. Copy temporary table to destination table. (WRITE_APPEND)
|
113
|
+
|
114
|
+
##### append_direct
|
115
|
+
|
116
|
+
Insert data into existing table directly.
|
117
|
+
This is not transactional, i.e., if fails, the target table could have some rows inserted.
|
93
118
|
|
94
119
|
##### replace
|
95
120
|
|
@@ -101,7 +126,7 @@ default. When append mode, plugin will insert data into existing table.
|
|
101
126
|
##### replace_backup
|
102
127
|
|
103
128
|
1. Load to temporary table.
|
104
|
-
2. Copy destination table to backup table. (
|
129
|
+
2. Copy destination table to backup table. (dataset_old, table_old)
|
105
130
|
3. Copy temporary table to destination table. (WRITE_TRUNCATE)
|
106
131
|
|
107
132
|
```is_skip_job_result_check``` must be false when replace_backup mode.
|
@@ -111,8 +136,6 @@ default. When append mode, plugin will insert data into existing table.
|
|
111
136
|
1. Delete destination table, if it exists.
|
112
137
|
2. Load to destination table.
|
113
138
|
|
114
|
-
```auto_create_table``` must be true when delete_in_advance mode.
|
115
|
-
|
116
139
|
### Authentication
|
117
140
|
|
118
141
|
There are three methods supported to fetch access token for the service account.
|
@@ -196,7 +219,7 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
|
|
196
219
|
|
197
220
|
If table already exists, insert into it.
|
198
221
|
|
199
|
-
There are
|
222
|
+
There are 3 ways to set schema.
|
200
223
|
|
201
224
|
#### Set schema.json
|
202
225
|
|
@@ -222,6 +245,78 @@ out:
|
|
222
245
|
template_table: existing_table_name
|
223
246
|
```
|
224
247
|
|
248
|
+
#### Guess from Embulk Schema
|
249
|
+
|
250
|
+
Plugin will try to guess BigQuery schema from Embulk schema. It is also configurable with `column_options`. See [Column Options](#column-options).
|
251
|
+
|
252
|
+
### Column Options
|
253
|
+
|
254
|
+
Column options are used to aid guessing BigQuery schema, or to define conversion of values:
|
255
|
+
|
256
|
+
- **column_options**: advanced: an array of options for columns
|
257
|
+
- **name**: column name
|
258
|
+
- **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
|
259
|
+
- boolean: `BOOLEAN`, `STRING` (default: `BOOLEAN`)
|
260
|
+
- long: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
|
261
|
+
- double: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
|
262
|
+
- string: `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
|
263
|
+
- timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
|
264
|
+
- json: `STRING`, `RECORD` (default: `STRING`)
|
265
|
+
- **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
|
266
|
+
- **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
|
267
|
+
- **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
|
268
|
+
- **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
|
269
|
+
- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
|
270
|
+
- **default_timezone**: default timezone for column_options (string, default is "UTC")
|
271
|
+
|
272
|
+
Example)
|
273
|
+
|
274
|
+
```yaml
|
275
|
+
out:
|
276
|
+
type: bigquery
|
277
|
+
auto_create_table: true
|
278
|
+
column_options:
|
279
|
+
- {name: date, type: STRING, timestamp_format: %Y-%m-%d, timezone: "Asia/Tokyo"}
|
280
|
+
- name: json_column
|
281
|
+
type: RECORD
|
282
|
+
fields:
|
283
|
+
- {name: key1, type: STRING}
|
284
|
+
- {name: key2, type: STRING}
|
285
|
+
```
|
286
|
+
|
287
|
+
NOTE: Type conversion is done in this jruby plugin, and could be slow. See [Formatter Performance Issue](#formatter-performance-issue) to improve the performance.
|
288
|
+
|
289
|
+
### Formatter Performance Issue
|
290
|
+
|
291
|
+
embulk-output-bigquery supports formatting records into CSV or JSON (and also formatting timestamp column).
|
292
|
+
However, this plugin is written in jruby, and jruby plugins are slower than java plugins generally.
|
293
|
+
|
294
|
+
Therefore, it is recommended to format records with filter plugins written in Java such as [embulk-filter-to_json](https://github.com/civitaspo/embulk-filter-to_json) as:
|
295
|
+
|
296
|
+
```
|
297
|
+
filters:
|
298
|
+
- type: to_json
|
299
|
+
column: {name: payload, type: string}
|
300
|
+
default_format: %Y-%m-%d %H:%M:%S.%6N
|
301
|
+
out:
|
302
|
+
type: bigquery
|
303
|
+
payload_column_index: 0 # or, payload_column: payload
|
304
|
+
```
|
305
|
+
|
306
|
+
Furtheremore, if your files are originally jsonl or csv files, you can even skip a parser with [embulk-parser-none](https://github.com/sonots/embulk-parser-none) as:
|
307
|
+
|
308
|
+
```
|
309
|
+
in:
|
310
|
+
type: file
|
311
|
+
path_prefix: example/example.jsonl
|
312
|
+
parser:
|
313
|
+
type: none
|
314
|
+
column_name: payload
|
315
|
+
out:
|
316
|
+
type: bigquery
|
317
|
+
payload_column_index: 0 # or, payload_column: payload
|
318
|
+
```
|
319
|
+
|
225
320
|
### Data Consistency
|
226
321
|
|
227
322
|
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
|
@@ -238,8 +333,39 @@ out:
|
|
238
333
|
prevent_duplicate_insert: true
|
239
334
|
```
|
240
335
|
|
241
|
-
##
|
336
|
+
## Development
|
337
|
+
|
338
|
+
### Run example:
|
339
|
+
|
340
|
+
Prepare a json\_keyfile at /tmp/your-project-000.json, then
|
242
341
|
|
243
342
|
```
|
244
|
-
$
|
343
|
+
$ embulk bundle install --path vendor/bundle
|
344
|
+
$ embulk run -X page_size=1 -b . -l trace example/example.yml
|
345
|
+
```
|
346
|
+
|
347
|
+
### Run test:
|
348
|
+
|
245
349
|
```
|
350
|
+
$ bundle exec rake test
|
351
|
+
```
|
352
|
+
|
353
|
+
To run tests which actually connects to BigQuery such as test/test\_bigquery\_client.rb,
|
354
|
+
prepare a json\_keyfile at /tmp/your-project-000.json, then
|
355
|
+
|
356
|
+
```
|
357
|
+
$ CONNECT=1 bundle exec ruby test/test_bigquery_client.rb
|
358
|
+
$ CONNECT=1 bundle exec ruby test/test_example.rb
|
359
|
+
```
|
360
|
+
|
361
|
+
### Release gem:
|
362
|
+
|
363
|
+
Fix gemspec, then
|
364
|
+
|
365
|
+
```
|
366
|
+
$ bundle exec rake release
|
367
|
+
```
|
368
|
+
|
369
|
+
## ChangeLog
|
370
|
+
|
371
|
+
[CHANGELOG.md](CHANGELOG.md)
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
desc 'Run test_unit based test'
|
5
|
+
Rake::TestTask.new(:test) do |t|
|
6
|
+
t.libs << "test"
|
7
|
+
t.test_files = Dir["test/**/test_*.rb"].sort
|
8
|
+
t.verbose = true
|
9
|
+
#t.warning = true
|
10
|
+
end
|
11
|
+
task :default => :test
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "embulk-output-bigquery"
|
3
|
+
spec.version = "0.3.0.pre1"
|
4
|
+
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
|
+
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
|
+
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
7
|
+
spec.email = ["satoshiakama@gmail.com", "sonots@gmail.com"]
|
8
|
+
spec.licenses = ["MIT"]
|
9
|
+
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
10
|
+
|
11
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
12
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
13
|
+
spec.require_paths = ["lib"]
|
14
|
+
|
15
|
+
spec.add_dependency 'google-api-client'
|
16
|
+
spec.add_dependency "tzinfo"
|
17
|
+
spec.add_development_dependency 'embulk', ['>= 0.8.2']
|
18
|
+
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
19
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
20
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: /tmp/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
auto_create_dataset: true
|
28
|
+
auto_create_table: true
|
29
|
+
schema_file: example/schema.json
|
30
|
+
timeout_sec: 400
|
31
|
+
open_timeout_sec: 400
|
32
|
+
retries: 2
|
33
|
+
application_name: "Embulk BigQuery plugin test"
|
@@ -0,0 +1,30 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: /tmp/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: CSV
|
27
|
+
compression: GZIP
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|