RubyGems - embulk-output-bigquery - Versions diffs - 0.4.14 → 0.6.3 - Mend

embulk-output-bigquery 0.4.14 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +28 -0
data/README.md +74 -77
data/embulk-output-bigquery.gemspec +10 -3
data/lib/embulk/output/bigquery.rb +19 -49
data/lib/embulk/output/bigquery/auth.rb +35 -0
data/lib/embulk/output/bigquery/bigquery_client.rb +2 -11
data/lib/embulk/output/bigquery/google_client.rb +3 -34
data/lib/embulk/output/bigquery/value_converter_factory.rb +12 -0
data/test/test_bigquery_client.rb +1 -5
data/test/test_configure.rb +10 -19
data/test/test_example.rb +0 -1
data/test/test_helper.rb +4 -1
data/test/test_transaction.rb +22 -62
data/test/test_value_converter_factory.rb +42 -0
metadata +29 -52
data/example/config_append_direct_schema_update_options.yml +0 -31
data/example/config_client_options.yml +0 -33
data/example/config_csv.yml +0 -30
data/example/config_delete_in_advance.yml +0 -29
data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
data/example/config_delete_in_advance_partitioned_table.yml +0 -33
data/example/config_expose_errors.yml +0 -30
data/example/config_gcs.yml +0 -32
data/example/config_guess_from_embulk_schema.yml +0 -29
data/example/config_guess_with_column_options.yml +0 -40
data/example/config_gzip.yml +0 -1
data/example/config_jsonl.yml +0 -1
data/example/config_max_threads.yml +0 -34
data/example/config_min_ouput_tasks.yml +0 -34
data/example/config_mode_append.yml +0 -30
data/example/config_mode_append_direct.yml +0 -30
data/example/config_nested_record.yml +0 -1
data/example/config_payload_column.yml +0 -20
data/example/config_payload_column_index.yml +0 -20
data/example/config_prevent_duplicate_insert.yml +0 -30
data/example/config_progress_log_interval.yml +0 -31
data/example/config_replace.yml +0 -30
data/example/config_replace_backup.yml +0 -32
data/example/config_replace_backup_field_partitioned_table.yml +0 -34
data/example/config_replace_backup_partitioned_table.yml +0 -34
data/example/config_replace_field_partitioned_table.yml +0 -33
data/example/config_replace_partitioned_table.yml +0 -33
data/example/config_replace_schema_update_options.yml +0 -33
data/example/config_skip_file_generation.yml +0 -32
data/example/config_table_strftime.yml +0 -30
data/example/config_template_table.yml +0 -21
data/example/config_uncompressed.yml +0 -1
data/example/config_with_rehearsal.yml +0 -33
data/example/example.csv +0 -17
data/example/example.yml +0 -1
data/example/example2_1.csv +0 -1
data/example/example2_2.csv +0 -1
data/example/example4_1.csv +0 -1
data/example/example4_2.csv +0 -1
data/example/example4_3.csv +0 -1
data/example/example4_4.csv +0 -1
data/example/json_key.json +0 -12
data/example/nested_example.jsonl +0 -16
data/example/schema.json +0 -30
data/example/schema_expose_errors.json +0 -30

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA256:
-  metadata.gz: 4fb376f288bfa86d632d727b3d0770ca4b94e364261c3f87a2569c801ee2fa00
-  data.tar.gz: 2571a07afb9aac0774e0744f9d5118712bb83f44f82470dd4fd25bf515c7b9fa
+SHA1:
+  metadata.gz: 8b3d7d7d675d8428946f81517d1002f667f4fafe
+  data.tar.gz: 25940b93f70492675869d3c4dd50f83f8b7347cf
 SHA512:
-  metadata.gz: 15f71decc69d34d8fbc3ee09452a6307107b71f759820b8a0521c6473b2231c4706febf216b59baae0e18fc3a06a056c18552d1093f0ac264ef84183a6d27992
-  data.tar.gz: 7ee57f82766927cb804bf0d88550f7f3e4d0459315160a0eec98ccd4c00e2a2423a093cffd17e836d2dba8461cbc2ae4e227ff85d60c7c9628d32b1fd142b7eb
+  metadata.gz: 97a2aff66c765f24289717ac79e0a25a6bf31ee3ec5b84b64c96e8573382b31b0a27c30f06692a296b3bfedd70ea9f34f1a451cea7de27d3fa4c61a7502bab98
+  data.tar.gz: b795d47af337e109dfafb9f41a0a720d0eb314c7ba7219193648505ec9dffa3874215b5d311256f625228a4f3e52b73153ee3d694a3d2f88d4c2fd0dd24960b1

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,31 @@
+## 0.6.3 - 2019-10-28
+* [enhancement] Add DATE type conveter (thanks to @tksfjt1024)
+## 0.6.2 - 2019-10-16
+* [maintenance] Lock signet and google-api-client version (thanks to @hiroyuki-sato)
+## 0.6.1 - 2019-08-28
+* [maintenance] Release a new gem not to include symlinks to make it work on Windows.
+## 0.6.0 - 2019-08-11
+Cleanup `auth_method`:
+* [enhancement] Support `auth_method: authorized_user` (OAuth)
+* [incompatibility change] Rename `auth_method: json_key` to `auth_method: service_account` (`json_key` is kept for backward compatibility)
+* [incompatibility change] Remove deprecated `auth_method: private_key` (p12 key)
+* [incompatibility change] Change the default `auth_method` to `application_default` from `private_key` because `private_key` was dropped.
+## 0.5.0 - 2019-08-10
+* [incompatibility change] Drop deprecated `time_partitioning`.`require_partition_filter`
+* [incompatibility change] Drop `prevent_duplicate_insert` which has no use-case now
+* [incompatibility change] Modes `replace`, `replace_backup`, `append`, and `delete_in_advance` require `auto_create_table: true` now because, previously, these modes had created a target table even with `auto_create_table: false` and made users being confused. Note that `auto_create_table: true` is always required even for a partition (a table name with a partition decorator) which may not require creating a table. This is for simplicity of logics and implementations.
+* [incompatibility change] Change default value of `auto_create_table` to `true` because the above 4 modes, that is, except `append_direct` always require `auto_create_table: true` now.
 ## 0.4.14 - 2019-08-10
 * [enhancement] Support field partitioning correctly.

data/README.md CHANGED Viewed

@@ -23,34 +23,23 @@ https://developers.google.com/bigquery/loading-data-into-bigquery
 Current version of this plugin supports Google API with Service Account Authentication, but does not support
 OAuth flow for installed applications.
-### INCOMPATIBILITY CHANGES
-v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGELOG.md) for details.
-* `formatter` option (formatter plugin support) is dropped. Use `source_format` option instead. (it already exists in v0.2.x too)
-* `encoders` option (encoder plugin support) is dropped. Use `compression` option instead (it already exists in v0.2.x too).
-* `mode: append` mode now expresses a transactional append, and `mode: append_direct` is one which is not transactional.
 ## Configuration
 #### Original options
 | name                                 | type        | required?  | default                  | description            |
 |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
-|  mode                                | string      | optional   | "append"                 | See [Mode](#mode)     |
-|  auth_method                         | string      | optional   | "private_key"            | `private_key` , `json_key` or `compute_engine`
-|  service_account_email               | string      | required when auth_method is private_key  |   | Your Google service account email
-|  p12_keyfile                         | string      | required when auth_method is private_key  |   | Fullpath of private key in P12(PKCS12) format |
-|  json_keyfile                        | string      | required when auth_method is json_key     |   | Fullpath of json key |
-|  project                             | string      | required if json_keyfile is not given     |   | project_id |
+|  mode                                | string      | optional   | "append"                 | See [Mode](#mode)      |
+|  auth_method                         | string      | optional   | "application\_default"   | See [Authentication](#authentication) |
+|  json_keyfile                        | string      | optional   |                          | keyfile path or `content` |
+|  project                             | string      | required unless service\_account's `json_keyfile` is given. | | project\_id |
 |  dataset                             | string      | required   |                          | dataset |
 |  location                            | string      | optional   | nil                      | geographic location of dataset. See [Location](#location) |
 |  table                               | string      | required   |                          | table name, or table name with a partition decorator such as `table_name$20160929`|
 |  auto_create_dataset                 | boolean     | optional   | false                    | automatically create dataset |
-|  auto_create_table                   | boolean     | optional   | false                    | See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
+|  auto_create_table                   | boolean     | optional   | true                     | `false` is available only for `append_direct` mode. Other modes require `true`. See [Dynamic Table Creating](#dynamic-table-creating) and [Time Partitioning](#time-partitioning) |
 |  schema_file                         | string      | optional   |                          | /path/to/schema.json |
 |  template_table                      | string      | optional   |                          | template table name. See [Dynamic Table Creating](#dynamic-table-creating) |
-|  prevent_duplicate_insert            | boolean     | optional   | false                    | See [Prevent Duplication](#prevent-duplication) |
 |  job_status_max_polling_time         | int         | optional   | 3600 sec                 | Max job status polling time |
 |  job_status_polling_interval         | int         | optional   | 10 sec                   | Job status polling interval |
 |  is_skip_job_result_check            | boolean     | optional   | false                    | Skip waiting Load job finishes. Available for append, or delete_in_advance mode |
@@ -107,7 +96,6 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
 |  time_partitioning.type           | string   | required  | nil     | The only type supported is DAY, which will generate one partition per day based on data loading time. |
 |  time_partitioning.expiration_ms  | int      | optional  | nil     | Number of milliseconds for which to keep the storage for a partition. |
 |  time_partitioning.field          | string   | optional  | nil     | `DATE` or `TIMESTAMP` column used for partitioning |
-|  time_partitioning.require_partition_filter | boolean      | optional  | nil     | If true, valid partition filter is required when query |
 |  clustering                       | hash     | optional  | nil     | Currently, clustering is supported for partitioned tables, so must be used with `time_partitioning` option. See [clustered tables](https://cloud.google.com/bigquery/docs/clustered-tables) |
 |  clustering.fields                | array    | required  | nil     | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
 |  schema_update_options            | array    | optional  | nil     | (Experimental) List of `ALLOW_FIELD_ADDITION` or `ALLOW_FIELD_RELAXATION` or both. See [jobs#configuration.load.schemaUpdateOptions](https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.schemaUpdateOptions). NOTE for the current status: `schema_update_options` does not work for `copy` job, that is, is not effective for most of modes such as `append`, `replace` and `replace_backup`. `delete_in_advance` deletes origin table so does not need to update schema. Only `append_direct` can utilize schema update. |
@@ -118,9 +106,8 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
 out:
   type: bigquery
   mode: append
-  auth_method: private_key   # default
-  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
-  p12_keyfile: /path/to/p12_keyfile.p12
+  auth_method: service_account
+  json_keyfile: /path/to/json_keyfile.json
   project: your-project-000
   dataset: your_dataset_name
   table: your_table_name
@@ -128,7 +115,7 @@ out:
   source_format: NEWLINE_DELIMITED_JSON
 ```
-### location
+### Location
 The geographic location of the dataset. Required except for US and EU.
@@ -136,7 +123,7 @@ GCS bucket should be in same region when you use `gcs_bucket`.
 See also [Dataset Locations | BigQuery | Google Cloud](https://cloud.google.com/bigquery/docs/dataset-locations)
-### mode
+### Mode
 5 modes are provided.
@@ -175,53 +162,69 @@ NOTE: BigQuery does not support replacing (actually, copying into) a non-partiti
 ### Authentication
-There are three methods supported to fetch access token for the service account.
+There are four authentication methods
+1. `service_account` (or `json_key` for backward compatibility)
+1. `authorized_user`
+1. `compute_engine`
+1. `application_default`
+#### service\_account (or json\_key)
-1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
-2. JSON key of GCP(Google Cloud Platform)'s service account
-3. Pre-defined access token (Google Compute Engine only)
+Use GCP service account credentials.
+You first need to create a service account, download its json key and deploy the key with embulk.
-#### Public-Private key pair of GCP's service account
+```yaml
+out:
+  type: bigquery
+  auth_method: service_account
+  json_keyfile: /path/to/json_keyfile.json
+```
-You first need to create a service account (client ID),
-download its private key and deploy the key with embulk.
+You can also embed contents of `json_keyfile` at config.yml.
 ```yaml
 out:
   type: bigquery
-  auth_method: private_key   # default
-  service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
-  p12_keyfile: /path/to/p12_keyfile.p12
+  auth_method: service_account
+  json_keyfile:
+    content: |
+      {
+          "private_key_id": "123456789",
+          "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
+          "client_email": "..."
+      }
 ```
-#### JSON key of GCP's service account
+#### authorized\_user
-You first need to create a service account (client ID),
-download its json key and deploy the key with embulk.
+Use Google user credentials.
+You can get your credentials at `~/.config/gcloud/application_default_credentials.json` by running `gcloud auth login`.
 ```yaml
 out:
   type: bigquery
-  auth_method: json_key
-  json_keyfile: /path/to/json_keyfile.json
+  auth_method: authorized_user
+  json_keyfile: /path/to/credentials.json
 ```
-You can also embed contents of json_keyfile at config.yml.
+You can also embed contents of `json_keyfile` at config.yml.
 ```yaml
 out:
   type: bigquery
-  auth_method: json_key
+  auth_method: authorized_user
   json_keyfile:
     content: |
       {
-          "private_key_id": "123456789",
-          "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
-          "client_email": "..."
-       }
+        "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
+        "client_secret":"xxxxxxxxxxx",
+        "refresh_token":"xxxxxxxxxxx",
+        "type":"authorized_user"
+      }
 ```
-#### Pre-defined access token(GCE only)
+#### compute\_engine
 On the other hand, you don't need to explicitly create a service account for embulk when you
 run embulk in Google Compute Engine. In this third authentication method, you need to
@@ -234,6 +237,22 @@ out:
   auth_method: compute_engine
 ```
+#### application\_default
+Use Application Default Credentials (ADC).  ADC is a strategy to locate Google Cloud Service Account credentials.
+1. ADC checks to see if the environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set. If the variable is set, ADC uses the service account file that the variable points to.
+2. ADC checks to see if `~/.config/gcloud/application_default_credentials.json` is located. This file is created by running `gcloud auth application-default login`.
+3. Use the default service account for credentials if the application running on Compute Engine, App Engine, Kubernetes Engine, Cloud Functions or Cloud Run.
+See https://cloud.google.com/docs/authentication/production for details.
+```yaml
+out:
+  type: bigquery
+  auth_method: application_default
+```
 ### Table id formatting
 `table` and option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
@@ -242,21 +261,16 @@ Table ids are formatted at runtime
 using the local time of the embulk server.
 For example, with the configuration below,
-data is inserted into tables `table_2015_04`, `table_2015_05` and so on.
+data is inserted into tables `table_20150503`, `table_20150504` and so on.
 ```yaml
 out:
   type: bigquery
-  table: table_%Y_%m
+  table: table_%Y%m%d
 ```
 ### Dynamic table creating
-This plugin tries to create a table using BigQuery API when
-* mode is either of `delete_in_advance`, `replace`, `replace_backup`, `append`.
-* mode is `append_direct` and `auto_create_table` is true.
 There are 3 ways to set schema.
 #### Set schema.json
@@ -267,7 +281,7 @@ Please set file path of schema.json.
 out:
   type: bigquery
   auto_create_table: true
-  table: table_%Y_%m
+  table: table_%Y%m%d
   schema_file: /path/to/schema.json
 ```
@@ -279,7 +293,7 @@ Plugin will try to read schema from existing table and use it as schema template
 out:
   type: bigquery
   auto_create_table: true
-  table: table_%Y_%m
+  table: table_%Y%m%d
   template_table: existing_table_name
 ```
@@ -293,17 +307,17 @@ Column options are used to aid guessing BigQuery schema, or to define conversion
 - **column_options**: advanced: an array of options for columns
   - **name**: column name
-  - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, and `RECORD`. See belows for supported conversion type.
+  - **type**: BigQuery type such as `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, and `RECORD`. See belows for supported conversion type.
     - boolean:   `BOOLEAN`, `STRING` (default: `BOOLEAN`)
     - long:      `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `INTEGER`)
     - double:    `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `FLOAT`)
-    - string:    `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `RECORD` (default: `STRING`)
-    - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP` (default: `TIMESTAMP`)
+    - string:    `BOOLEAN`, `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE`, `RECORD` (default: `STRING`)
+    - timestamp: `INTEGER`, `FLOAT`, `STRING`, `TIMESTAMP`, `DATE` (default: `TIMESTAMP`)
     - json:      `STRING`,  `RECORD` (default: `STRING`)
   - **mode**: BigQuery mode such as `NULLABLE`, `REQUIRED`, and `REPEATED` (string, default: `NULLABLE`)
   - **fields**: Describes the nested schema fields if the type property is set to RECORD. Please note that this is **required** for `RECORD` column.
   - **timestamp_format**: timestamp format to convert into/from `timestamp` (string, default is `default_timestamp_format`)
-  - **timezone**: timezone to convert into/from `timestamp` (string, default is `default_timezone`).
+  - **timezone**: timezone to convert into/from `timestamp`, `date` (string, default is `default_timezone`).
 - **default_timestamp_format**: default timestamp format for column_options (string, default is "%Y-%m-%d %H:%M:%S.%6N")
 - **default_timezone**: default timezone for column_options (string, default is "UTC")
@@ -355,22 +369,6 @@ out:
   payload_column_index: 0 # or, payload_column: payload
 ```
-### Prevent Duplication
-`prevent_duplicate_insert` option is used to prevent inserting same data for modes `append` or `append_direct`.
-When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options.
-`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
-[job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency) so that same data can't be inserted with same settings repeatedly.
-```yaml
-out:
-  type: bigquery
-  prevent_duplicate_insert: true
-```
 ### GCS Bucket
 This is useful to reduce number of consumed jobs, which is limited by [100,000 jobs per project per day](https://cloud.google.com/bigquery/quotas#load_jobs).
@@ -401,32 +399,31 @@ To load into a partition, specify `table` parameter with a partition decorator a
 out:
   type: bigquery
   table: table_name$20160929
-  auto_create_table: true
 ```
-You may configure `time_partitioning` parameter together to create table via `auto_create_table: true` option as:
+You may configure `time_partitioning` parameter together as:
 ```yaml
 out:
   type: bigquery
   table: table_name$20160929
-  auto_create_table: true
   time_partitioning:
     type: DAY
     expiration_ms: 259200000
 ```
 You can also create column-based partitioning table as:
 ```yaml
 out:
   type: bigquery
   mode: replace
-  auto_create_table: true
   table: table_name
   time_partitioning:
     type: DAY
     field: timestamp
 ```
 Note the `time_partitioning.field` should be top-level `DATE` or `TIMESTAMP`.
 Use [Tables: patch](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) API to update the schema of the partitioned table, embulk-output-bigquery itself does not support it, though.

data/embulk-output-bigquery.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |spec|
   spec.name          = "embulk-output-bigquery"
-  spec.version       = "0.4.14"
+  spec.version       = "0.6.3"
   spec.authors       = ["Satoshi Akama", "Naotoshi Seo"]
   spec.summary       = "Google BigQuery output plugin for Embulk"
   spec.description   = "Embulk plugin that insert records to Google BigQuery."
@@ -8,11 +8,18 @@ Gem::Specification.new do |spec|
   spec.licenses      = ["MIT"]
   spec.homepage      = "https://github.com/embulk/embulk-output-bigquery"
-  spec.files         = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
+  # Exclude example directory which uses symlinks from generating gem.
+  # Symlinks do not work properly on the Windows platform without administrator privilege.
+  spec.files         = `git ls-files`.split("\n") + Dir["classpath/*.jar"] - Dir["example/*" ]
   spec.test_files    = spec.files.grep(%r{^(test|spec)/})
   spec.require_paths = ["lib"]
-  spec.add_dependency 'google-api-client'
+  # TODO
+  # signet 0.12.0 and google-api-client 0.33.0 require >= Ruby 2.4.
+  # Embulk 0.9 use JRuby 9.1.X.Y and It compatible Ruby 2.3.
+  # So, Force install signet < 0.12 and google-api-client < 0.33.0
+  spec.add_dependency 'signet', '~> 0.7', '< 0.12.0'
+  spec.add_dependency 'google-api-client','< 0.33.0'
   spec.add_dependency 'time_with_zone'
   spec.add_development_dependency 'bundler', ['>= 1.10.6']

data/lib/embulk/output/bigquery.rb CHANGED Viewed

@@ -23,7 +23,7 @@ module Embulk
         # @return JSON string
         def self.load(v)
           if v.is_a?(String) # path
-            File.read(v)
+            File.read(File.expand_path(v))
           elsif v.is_a?(Hash)
             v['content']
           end
@@ -33,9 +33,7 @@ module Embulk
       def self.configure(config, schema, task_count)
         task = {
           'mode'                           => config.param('mode',                           :string,  :default => 'append'),
-          'auth_method'                    => config.param('auth_method',                    :string,  :default => 'private_key'),
-          'service_account_email'          => config.param('service_account_email',          :string,  :default => nil),
-          'p12_keyfile'                    => config.param('p12_keyfile',                    :string,  :default => nil),
+          'auth_method'                    => config.param('auth_method',                    :string,  :default => 'application_default'),
           'json_keyfile'                   => config.param('json_keyfile',                  LocalFile, :default => nil),
           'project'                        => config.param('project',                        :string,  :default => nil),
           'dataset'                        => config.param('dataset',                        :string),
@@ -45,7 +43,7 @@ module Embulk
           'table_old'                      => config.param('table_old',                      :string,  :default => nil),
           'table_name_old'                 => config.param('table_name_old',                 :string,  :default => nil), # lower version compatibility
           'auto_create_dataset'            => config.param('auto_create_dataset',            :bool,    :default => false),
-          'auto_create_table'              => config.param('auto_create_table',              :bool,    :default => false),
+          'auto_create_table'              => config.param('auto_create_table',              :bool,    :default => true),
           'schema_file'                    => config.param('schema_file',                    :string,  :default => nil),
           'template_table'                 => config.param('template_table',                 :string,  :default => nil),
@@ -53,7 +51,6 @@ module Embulk
           'job_status_max_polling_time'    => config.param('job_status_max_polling_time',    :integer, :default => 3600),
           'job_status_polling_interval'    => config.param('job_status_polling_interval',    :integer, :default => 10),
           'is_skip_job_result_check'       => config.param('is_skip_job_result_check',       :bool,    :default => false),
-          'prevent_duplicate_insert'       => config.param('prevent_duplicate_insert',       :bool,    :default => false),
           'with_rehearsal'                 => config.param('with_rehearsal',                 :bool,    :default => false),
           'rehearsal_counts'               => config.param('rehearsal_counts',               :integer, :default => 1000),
           'abort_on_error'                 => config.param('abort_on_error',                 :bool,    :default => nil),
@@ -105,10 +102,14 @@ module Embulk
           raise ConfigError.new "`mode` must be one of append, append_direct, replace, delete_in_advance, replace_backup"
         end
+        if %w[append replace delete_in_advance replace_backup].include?(task['mode']) and !task['auto_create_table']
+          raise ConfigError.new "`mode: #{task['mode']}` requires `auto_create_table: true`"
+        end
         if task['mode'] == 'replace_backup'
           task['table_old'] ||= task['table_name_old'] # for lower version compatibility
           if task['dataset_old'].nil? and task['table_old'].nil?
-            raise ConfigError.new "`mode replace_backup` requires either of `dataset_old` or `table_old`"
+            raise ConfigError.new "`mode: replace_backup` requires either of `dataset_old` or `table_old`"
           end
           task['dataset_old'] ||= task['dataset']
           task['table_old']   ||= task['table']
@@ -122,28 +123,21 @@ module Embulk
         end
         task['auth_method'] = task['auth_method'].downcase
-        unless %w[private_key json_key compute_engine application_default].include?(task['auth_method'])
-          raise ConfigError.new "`auth_method` must be one of private_key, json_key, compute_engine, application_default"
-        end
-        if task['auth_method'] == 'private_key' and task['p12_keyfile'].nil?
-          raise ConfigError.new "`p12_keyfile` is required for auth_method private_key"
+        unless %w[json_key service_account authorized_user compute_engine application_default].include?(task['auth_method'])
+          raise ConfigError.new "`auth_method` must be one of service_account (or json_key), authorized_user, compute_engine, application_default"
         end
-        if task['auth_method'] == 'json_key' and task['json_keyfile'].nil?
-          raise ConfigError.new "`json_keyfile` is required for auth_method json_key"
+        if (task['auth_method'] == 'service_account' or task['auth_method'] == 'json_key') and task['json_keyfile'].nil?
+          raise ConfigError.new "`json_keyfile` is required for auth_method: service_account (or json_key)"
         end
-        jsonkey_params = nil
         if task['json_keyfile']
           begin
-            jsonkey_params = JSON.parse(task['json_keyfile'])
+            json_key = JSON.parse(task['json_keyfile'])
+            task['project'] ||= json_key['project_id']
           rescue => e
             raise ConfigError.new "json_keyfile is not a JSON file"
           end
         end
-        if jsonkey_params
-          task['project'] ||= jsonkey_params['project_id']
-        end
         if task['project'].nil?
           raise ConfigError.new "Required field \"project\" is not set"
         end
@@ -306,42 +300,18 @@ module Embulk
         case task['mode']
         when 'delete_in_advance'
-          bigquery.delete_partition(task['table'])
+          bigquery.delete_table_or_partition(task['table'])
           bigquery.create_table_if_not_exists(task['table'])
         when 'replace'
           bigquery.create_table_if_not_exists(task['temp_table'])
-          if Helper.has_partition_decorator?(task['table'])
-            if task['auto_create_table']
-              bigquery.create_table_if_not_exists(task['table'])
-            else
-              bigquery.get_table(task['table']) # raises NotFoundError
-            end
-          end
+          bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
         when 'append'
           bigquery.create_table_if_not_exists(task['temp_table'])
-          if Helper.has_partition_decorator?(task['table'])
-            if task['auto_create_table']
-              bigquery.create_table_if_not_exists(task['table'])
-            else
-              bigquery.get_table(task['table']) # raises NotFoundError
-            end
-          end
+          bigquery.create_table_if_not_exists(task['table']) # needs for when task['table'] is a partition
         when 'replace_backup'
           bigquery.create_table_if_not_exists(task['temp_table'])
-          if Helper.has_partition_decorator?(task['table'])
-            if task['auto_create_table']
-              bigquery.create_table_if_not_exists(task['table'])
-            else
-              bigquery.get_table(task['table']) # raises NotFoundError
-            end
-          end
-          if Helper.has_partition_decorator?(task['table_old'])
-            if task['auto_create_table']
-              bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old'])
-            else
-              bigquery.get_table(task['table_old'], dataset: task['dataset_old']) # raises NotFoundError
-            end
-          end
+          bigquery.create_table_if_not_exists(task['table'])
+          bigquery.create_table_if_not_exists(task['table_old'], dataset: task['dataset_old']) # needs for when a partition
         else # append_direct
           if task['auto_create_table']
             bigquery.create_table_if_not_exists(task['table'])