RubyGems - embulk-output-bigquery - Versions diffs - 0.2.3 → 0.3.0.pre1 - Mend

embulk-output-bigquery 0.2.3 → 0.3.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

checksums.yaml +4 -4
data/.gitignore +6 -12
data/CHANGELOG.md +18 -0
data/Gemfile +8 -0
data/LICENSE.txt +20 -0
data/README.md +165 -39
data/Rakefile +11 -0
data/embulk-output-bigquery.gemspec +20 -0
data/example/config_client_options.yml +33 -0
data/example/config_csv.yml +30 -0
data/example/config_delete_in_advance.yml +29 -0
data/example/config_expose_errors.yml +30 -0
data/example/config_guess_from_embulk_schema.yml +29 -0
data/example/config_guess_with_column_options.yml +40 -0
data/example/config_gzip.yml +30 -0
data/example/config_jsonl.yml +30 -0
data/example/config_mode_append.yml +30 -0
data/example/config_mode_append_direct.yml +30 -0
data/example/config_payload_column.yml +20 -0
data/example/config_payload_column_index.yml +20 -0
data/example/config_prevent_duplicate_insert.yml +30 -0
data/example/config_replace.yml +30 -0
data/example/config_replace_backup.yml +32 -0
data/example/config_skip_file_generation.yml +32 -0
data/example/config_table_strftime.yml +30 -0
data/example/config_template_table.yml +21 -0
data/example/config_uncompressed.yml +30 -0
data/example/config_with_rehearsal.yml +32 -0
data/example/example.csv +17 -0
data/example/example.jsonl +16 -0
data/example/example.yml +30 -0
data/example/json_key.json +12 -0
data/example/nested_example.jsonl +16 -0
data/example/schema.json +30 -0
data/example/schema_expose_errors.json +30 -0
data/lib/embulk/output/bigquery.rb +388 -3
data/lib/embulk/output/bigquery/bigquery_client.rb +396 -0
data/lib/embulk/output/bigquery/file_writer.rb +103 -0
data/lib/embulk/output/bigquery/helper.rb +78 -0
data/lib/embulk/output/bigquery/value_converter_factory.rb +292 -0
data/test/helper.rb +13 -0
data/test/test_bigquery_client.rb +166 -0
data/test/test_configure.rb +254 -0
data/test/test_example.rb +34 -0
data/test/test_file_writer.rb +129 -0
data/test/test_helper.rb +103 -0
data/test/test_transaction.rb +129 -0
data/test/test_value_converter_factory.rb +316 -0
metadata +114 -45
data/build.gradle +0 -80
data/config/checkstyle/checkstyle.xml +0 -128
data/config/checkstyle/default.xml +0 -108
data/gradle/wrapper/gradle-wrapper.jar +0 -0
data/gradle/wrapper/gradle-wrapper.properties +0 -6
data/gradlew +0 -164
data/gradlew.bat +0 -90
data/settings.gradle +0 -2
data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -117
data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +0 -508
data/src/main/java/org/embulk/output/BigqueryWriter.java +0 -575
data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +0 -5
data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +0 -5
data/src/test/java/org/embulk/output/TestBigqueryWriter.java +0 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 6e992d7d8add2b7ef5634d0fd4e41a4e90233e1e
-  data.tar.gz: 1f87d54b14a5b5e7a52d3581d03a6a76b6d9b0f2
+  metadata.gz: 0907e6f02a9b05ea6a75e18d457cb641eede5973
+  data.tar.gz: f8119467b434636fc6f6696c36f2075eeb82e795
 SHA512:
-  metadata.gz: 01ed587d80ba5c60be6a0e0ebaa4e9c9b3f576ab6151f384ff4887034f4c2e3f0a7c490a7685689ad8a79035844fd2c48130f6bb81e22fcf9e75cad0c5e9deea
-  data.tar.gz: c8f728989565537ddeffde30b9ee5f8798e70f25e44739e90d0ae2e34ddeca6b3e45d131d538e91da90c430a4ae9ece87623d10c7bbd817184ad5c47cc8d80df
+  metadata.gz: 4b7313ca411cc3bb1fd064554ac706219e00fb358a445e2978e918c417aacc96ee972d3d49d30e9c1ebdb286066af4cf18305846fdf4dab06a1ced91249af2dc
+  data.tar.gz: 2f9e84d736de70a35369b4a003d8c1848a1433da902843d57b89a8b5ff5eb64bb945fceb828af2f565b87c271b9537dc433d53a6a1024c4ea014f74842abc0e1

data/.gitignore CHANGED Viewed

@@ -1,14 +1,8 @@
-target/
-build/
-pkg/
-*.iml
 *~
-._*
-.idea
-tmp/
+/pkg/
+/tmp/
+/.bundle/
+/Gemfile.lock
 vendor/
-/classpath/
-/.bundle
-.yardoc
-/embulk-*.jar
-/.gradle
+.ruby-version
+.tags

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,21 @@
+## 0.3.0 - YYYY-MM-DD
+Big change is introduced. Now, embulk-output-bigquery is written in JRuby.
+* [new feature] Support parallel loads. Fix [#28](https://github.com/embulk/embulk-output-bigquery/issues/28).
+* [new feature] Create table first. Fix [#29](https://github.com/embulk/embulk-output-bigquery/issues/29).
+* [new feature] Introduce rehearsal mode. Fix [#30](https://github.com/embulk/embulk-output-bigquery/issues/30).
+* [new feature] Support `dataset_old` option for `replace_backup`. Fix [#31](https://github.com/embulk/embulk-output-bigquery/issues/31).
+* [maintenance] Fix default timestamp format to `%Y-%m-%d %H:%M:%S.%6`. Fix [#32](https://github.com/embulk/embulk-output-bigquery/issues/32).
+* [new feature] Support request options such as `timeout_sec`, `open_timeout_sec`, `retries`. Fix [#33](https://github.com/embulk/embulk-output-bigquery/issues/33).
+* [new feature] Support continuing from file generation with `skip_file_generation` option.
+* [new feature] Guess BigQuery schema from Embulk schema. Fix [#1](https://github.com/embulk/embulk-output-bigquery/issues/1).
+* [new feature] Support automatically create dataset.
+* [new feature] Support transactional append mode.
+* [incompatibility change] Formatter plugin support is dropped. Formatter is done in this plugin for specified `source_format`.
+* [incompatibility change] Encoder plugin support is dropped. Encoding is done in this plugin for specified `compression`.
+* [incompatibility change] `append` mode now expresses a transactional append, and `append_direct` is one which is not transactional (this was `append` mode before)
 ## 0.2.3 - 2016-02-19
 * [maintenance] Fix detect logic of delete_in_advance mode. [#26](https://github.com/embulk/embulk-output-bigquery/issues/26). @sonots thanks!

data/Gemfile ADDED Viewed

@@ -0,0 +1,8 @@
+source 'https://rubygems.org/'
+gemspec
+gem 'embulk-parser-none'
+gem 'embulk-parser-jsonl'
+gem 'pry-nav'
+gem 'test-unit'
+gem 'test-unit-rr'

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,20 @@
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md CHANGED Viewed

@@ -1,4 +1,3 @@
 # embulk-output-bigquery
 [Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/) using [direct insert](https://cloud.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest)
@@ -26,27 +25,53 @@ OAuth flow for installed applications.
 #### Original options
-| name                      | type        | required?  | default      | description            |
-|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
-|  mode                     | string      | optional   | "append"     | [See below](#mode)     |
-|  auth_method              | string      | optional   | "private_key"  | `private_key` , `json_key` or `compute_engine`
-|  service_account_email    | string      | required when auth_method is private_key  |   | Your Google service account email
-|  p12_keyfile              | string      | required when auth_method is private_key   |   | Fullpath of private key in P12(PKCS12) format |
-|  json_keyfile             | string      | required when auth_method is json_key     |   | Fullpath of json key |
-|  sequence_format          | string      | optional   | %03d.%02d      |  |
-|  file_ext                 | string      | optional   |                | e.g. ".csv.gz" ".json.gz" |
-|  project                  | string      | required   |                | project_id |
-|  dataset                  | string      | required   |                | dataset |
-|  table                    | string      | required   |                | table name |
-|  auto_create_table        | boolean     | optional   | 0              | [See below](#dynamic-table-creating) |
-|  schema_file              | string      | optional   |                | /path/to/schema.json |
-|  template_table           | string      | optional   |                | existing_table_name [See below](#dynamic-table-creating) |
-|  prevent_duplicate_insert | boolean     | optional   | 0              | [See below](#data-consistency) |
-|  delete_from_local_when_job_end | boolean     | optional   | 0            | If set to true, delete local file when job is end |
-|  job_status_max_polling_time    | int         | optional   | 3600 sec     | Max job status polling time |
-|  job_status_max_polling_time    | int         | optional   | 10 sec       | Job status polling interval |
-|  is_skip_job_result_check       | boolean     | optional   | 0            |  |
-|  application_name         | string      | optional   | "Embulk BigQuery plugin" | Anything you like |
+| name                                 | type        | required?  | default                  | description            |
+|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
+|  mode                                | string      | optional   | "append"                 | [See below](#mode)     |
+|  auth_method                         | string      | optional   | "private_key"            | `private_key` , `json_key` or `compute_engine`
+|  service_account_email               | string      | required when auth_method is private_key  |   | Your Google service account email
+|  p12_keyfile                         | string      | required when auth_method is private_key  |   | Fullpath of private key in P12(PKCS12) format |
+|  json_keyfile                        | string      | required when auth_method is json_key     |   | Fullpath of json key |
+|  project                             | string      | required if json_keyfile is not given     |   | project_id |
+|  dataset                             | string      | required   |                          | dataset |
+|  table                               | string      | required   |                          | table name |
+|  auto_create_dataset                 | boolean     | optional   | false                    | automatically create dataset |
+|  auto_create_table                   | boolean     | optional   | false                    | [See below](#dynamic-table-creating) |
+|  schema_file                         | string      | optional   |                          | /path/to/schema.json |
+|  template_table                      | string      | optional   |                          | template table name [See below](#dynamic-table-creating) |
+|  prevent_duplicate_insert            | boolean     | optional   | false                    | [See below](#data-consistency) |
+|  job_status_max_polling_time         | int         | optional   | 3600 sec                 | Max job status polling time |
+|  job_status_polling_interval         | int         | optional   | 10 sec                   | Job status polling interval |
+|  is_skip_job_result_check            | boolean     | optional   | false                    | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
+|  with_rehearsal                      | boolean     | optional   | false                    | Load `rehearsal_counts` records as a rehearsal. Rehearsal loads into REHEARSAL temporary table, and delete finally. You may use this option to investigate data errors as early stage as possible |
+|  rehearsal_counts                    | integer     | optional   | 1000                     | Specify number of records to load in a rehearsal |
+|  column_options                      | hash        | optional   |                          | [See below](#column-options) |
+|  default_timezone                    | string      | optional   | UTC                      | |
+|  default_timestamp_format            | string      | optional   | %Y-%m-%d %H:%M:%S.%6N    | |
+|  payload_column                      | string      | optional   | nil                      | [See below](#formatter-performance-issue) |
+|  payload_column_index                | integer     | optional   | nil                      | [See below](#formatter-performance-issue) |
+Client or request options
+| name                                 | type        | required?  | default                  | description            |
+|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
+|  timeout_sec                         | integer     | optional   | 300                      | Seconds to wait for one block to be read |
+|  open_timeout_sec                    | integer     | optional   | 300                      | Seconds to wait for the connection to open |
+|  retries                             | integer     | optional   | 5                        | Number of retries |
+|  application_name                    | string      | optional   | "Embulk BigQuery plugin" | User-Agent |
+Options for intermediate local files
+| name                                 | type        | required?  | default                  | description            |
+|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
+|  path_prefix                         | string      | optional   |                          | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
+|  sequence_format                     | string      | optional   | .%d.%03d                 | Sequence format for pid, task index |
+|  file_ext                            | string      | optional   |                          | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
+|  skip_file_generation                | boolean     | optional   |                          | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
+|  delete_from_local_when_job_end      | boolean     | optional   | false                    | If set to true, delete glocal file when job is end |
+|  compression                         | string      | optional   | "NONE"                   | Compression of local files (`GZIP` or `NONE`) |
+`source_format` is also used to determine formatter (csv or jsonl).
 #### Same options of bq command-line tools or BigQuery job's propery
@@ -54,7 +79,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
 | name                      | type        | required?  | default      | description            |
 |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
-|  source_format            | string      | required   | "CSV"          | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
+|  source_format            | string      | required   | "CSV"        |   File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
 |  max_bad_records          | int         | optional   | 0            | |
 |  field_delimiter          | char        | optional   | ","          |  |
 |  encoding                 | string      | optional   | "UTF-8"      | `UTF-8` or `ISO-8859-1` |
@@ -70,26 +95,26 @@ out:
   auth_method: private_key   # default
   service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
   p12_keyfile: /path/to/p12_keyfile.p12
-  path_prefix: /path/to/output
-  file_ext: csv.gz
-  source_format: CSV
   project: your-project-000
   dataset: your_dataset_name
   table: your_table_name
-  formatter:
-    type: csv
-    header_line: false
-  encoders:
-  - {type: gzip}
+  compression: GZIP
+  source_format: NEWLINE_DELIMITED_JSON
 ```
 ### mode
-4 modes are provided.
+5 modes are provided.
 ##### append
-default. When append mode, plugin will insert data into existing table.
+1. Load to temporary table.
+2. Copy temporary table to destination table. (WRITE_APPEND)
+##### append_direct
+Insert data into existing table directly.
+This is not transactional, i.e., if fails, the target table could have some rows inserted.
 ##### replace
@@ -101,7 +126,7 @@ default. When append mode, plugin will insert data into existing table.
 ##### replace_backup
 1. Load to temporary table.
-2. Copy destination table to backup table. (table_name_old)
+2. Copy destination table to backup table. (dataset_old, table_old)
 3. Copy temporary table to destination table. (WRITE_TRUNCATE)
 ```is_skip_job_result_check``` must be false when replace_backup mode.
@@ -111,8 +136,6 @@ default. When append mode, plugin will insert data into existing table.
 1. Delete destination table, if it exists.
 2. Load to destination table.
-```auto_create_table``` must be true when delete_in_advance mode.
 ### Authentication
 There are three methods supported to fetch access token for the service account.
@@ -196,7 +219,7 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
 If table already exists, insert into it.
-There are 2 ways to set schema.
+There are 3 ways to set schema.
 #### Set schema.json
@@ -222,6 +245,78 @@ out:
   template_table: existing_table_name
 ```
+#### Guess from Embulk Schema
+Plugin will try to guess BigQuery schema from Embulk schema.  It is also configurable with `column_options`. See [Column Options](#column-options).
+### Column Options
+Column options are used to aid guessing BigQuery schema, or to define conversion of values:
+- **column_options**: advanced: an array of options for columns
+  - **name**: column name
+  - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
+    - boolean:   `BOOLEAN`, `STRING` (default: `BOOLEAN`)
+    - long:      `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
+    - double:    `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
+    - string:    `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
+    - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
+    - json:      `STRING`,  `RECORD` (default: `STRING`)
+  - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
+  - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
+  - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
+  - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
+- **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
+- **default_timezone**: default timezone for column_options (string, default is "UTC")
+Example)
+```yaml
+out:
+  type: bigquery
+  auto_create_table: true
+  column_options:
+    - {name: date, type: STRING, timestamp_format: %Y-%m-%d, timezone: "Asia/Tokyo"}
+    - name: json_column
+      type: RECORD
+      fields:
+        - {name: key1, type: STRING}
+        - {name: key2, type: STRING}
+```
+NOTE: Type conversion is done in this jruby plugin, and could be slow. See [Formatter Performance Issue](#formatter-performance-issue) to improve the performance.
+### Formatter Performance Issue
+embulk-output-bigquery supports formatting records into CSV or JSON (and also formatting timestamp column).
+However, this plugin is written in jruby, and jruby plugins are slower than java plugins generally.
+Therefore, it is recommended to format records with filter plugins written in Java such as [embulk-filter-to_json](https://github.com/civitaspo/embulk-filter-to_json) as:
+```
+filters:
+  - type: to_json
+    column: {name: payload, type: string}
+    default_format: %Y-%m-%d %H:%M:%S.%6N
+out:
+  type: bigquery
+  payload_column_index: 0 # or, payload_column: payload
+```
+Furtheremore, if your files are originally jsonl or csv files, you can even skip a parser with [embulk-parser-none](https://github.com/sonots/embulk-parser-none) as:
+```
+in:
+  type: file
+  path_prefix: example/example.jsonl
+  parser:
+    type: none
+    column_name: payload
+out:
+  type: bigquery
+  payload_column_index: 0 # or, payload_column: payload
+```
 ### Data Consistency
 When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file  and other options to prevent duplicate data insertion.
@@ -238,8 +333,39 @@ out:
   prevent_duplicate_insert: true
 ```
-## Build
+## Development
+### Run example:
+Prepare a json\_keyfile at /tmp/your-project-000.json, then
 ```
-$ ./gradlew gem
+$ embulk bundle install --path vendor/bundle
+$ embulk run -X page_size=1 -b . -l trace example/example.yml
+```
+### Run test:
 ```
+$ bundle exec rake test
+```
+To run tests which actually connects to BigQuery such as test/test\_bigquery\_client.rb,
+prepare a json\_keyfile at /tmp/your-project-000.json, then
+```
+$ CONNECT=1 bundle exec ruby test/test_bigquery_client.rb
+$ CONNECT=1 bundle exec ruby test/test_example.rb
+```
+### Release gem:
+Fix gemspec, then
+```
+$ bundle exec rake release
+```
+## ChangeLog
+[CHANGELOG.md](CHANGELOG.md)

data/Rakefile ADDED Viewed

@@ -0,0 +1,11 @@
+require "bundler/gem_tasks"
+require 'rake/testtask'
+desc 'Run test_unit based test'
+Rake::TestTask.new(:test) do |t|
+  t.libs << "test"
+  t.test_files = Dir["test/**/test_*.rb"].sort
+  t.verbose = true
+  #t.warning = true
+end
+task :default => :test

data/embulk-output-bigquery.gemspec ADDED Viewed

@@ -0,0 +1,20 @@
+Gem::Specification.new do |spec|
+  spec.name          = "embulk-output-bigquery"
+  spec.version       = "0.3.0.pre1"
+  spec.authors       = ["Satoshi Akama", "Naotoshi Seo"]
+  spec.summary       = "Google BigQuery output plugin for Embulk"
+  spec.description   = "Embulk plugin that insert records to Google BigQuery."
+  spec.email         = ["satoshiakama@gmail.com", "sonots@gmail.com"]
+  spec.licenses      = ["MIT"]
+  spec.homepage      = "https://github.com/embulk/embulk-output-bigquery"
+  spec.files         = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
+  spec.test_files    = spec.files.grep(%r{^(test|spec)/})
+  spec.require_paths = ["lib"]
+  spec.add_dependency 'google-api-client'
+  spec.add_dependency "tzinfo"
+  spec.add_development_dependency 'embulk', ['>= 0.8.2']
+  spec.add_development_dependency 'bundler', ['>= 1.10.6']
+  spec.add_development_dependency 'rake', ['>= 10.0']
+end

data/example/config_client_options.yml ADDED Viewed

@@ -0,0 +1,33 @@
+in:
+  type: file
+  path_prefix: example/example.csv
+  parser:
+    type: csv
+    charset: UTF-8
+    newline: CRLF
+    null_string: 'NULL'
+    skip_header_lines: 1
+    comment_line_marker: '#'
+    columns:
+      - {name: date,        type: string}
+      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
+      - {name: "null",      type: string}
+      - {name: long,        type: long}
+      - {name: string,      type: string}
+      - {name: double,      type: double}
+      - {name: boolean,     type: boolean}
+out:
+  type: bigquery
+  mode: replace
+  auth_method: json_key
+  json_keyfile: /tmp/your-project-000.json
+  dataset: your_dataset_name
+  table: your_table_name
+  source_format: NEWLINE_DELIMITED_JSON
+  auto_create_dataset: true
+  auto_create_table: true
+  schema_file: example/schema.json
+  timeout_sec: 400
+  open_timeout_sec: 400
+  retries: 2
+  application_name: "Embulk BigQuery plugin test"

data/example/config_csv.yml ADDED Viewed

@@ -0,0 +1,30 @@
+in:
+  type: file
+  path_prefix: example/example.csv
+  parser:
+    type: csv
+    charset: UTF-8
+    newline: CRLF
+    null_string: 'NULL'
+    skip_header_lines: 1
+    comment_line_marker: '#'
+    columns:
+      - {name: date,        type: string}
+      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
+      - {name: "null",      type: string}
+      - {name: long,        type: long}
+      - {name: string,      type: string}
+      - {name: double,      type: double}
+      - {name: boolean,     type: boolean}
+out:
+  type: bigquery
+  mode: replace
+  auth_method: json_key
+  json_keyfile: /tmp/your-project-000.json
+  dataset: your_dataset_name
+  table: your_table_name
+  source_format: CSV
+  compression: GZIP
+  auto_create_dataset: true
+  auto_create_table: true
+  schema_file: example/schema.json