RubyGems - embulk-output-bigquery - Versions diffs - 0.4.14 → 0.6.3 - Mend

embulk-output-bigquery 0.4.14 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +28 -0
data/README.md +74 -77
data/embulk-output-bigquery.gemspec +10 -3
data/lib/embulk/output/bigquery.rb +19 -49
data/lib/embulk/output/bigquery/auth.rb +35 -0
data/lib/embulk/output/bigquery/bigquery_client.rb +2 -11
data/lib/embulk/output/bigquery/google_client.rb +3 -34
data/lib/embulk/output/bigquery/value_converter_factory.rb +12 -0
data/test/test_bigquery_client.rb +1 -5
data/test/test_configure.rb +10 -19
data/test/test_example.rb +0 -1
data/test/test_helper.rb +4 -1
data/test/test_transaction.rb +22 -62
data/test/test_value_converter_factory.rb +42 -0
metadata +29 -52
data/example/config_append_direct_schema_update_options.yml +0 -31
data/example/config_client_options.yml +0 -33
data/example/config_csv.yml +0 -30
data/example/config_delete_in_advance.yml +0 -29
data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
data/example/config_delete_in_advance_partitioned_table.yml +0 -33
data/example/config_expose_errors.yml +0 -30
data/example/config_gcs.yml +0 -32
data/example/config_guess_from_embulk_schema.yml +0 -29
data/example/config_guess_with_column_options.yml +0 -40
data/example/config_gzip.yml +0 -1
data/example/config_jsonl.yml +0 -1
data/example/config_max_threads.yml +0 -34
data/example/config_min_ouput_tasks.yml +0 -34
data/example/config_mode_append.yml +0 -30
data/example/config_mode_append_direct.yml +0 -30
data/example/config_nested_record.yml +0 -1
data/example/config_payload_column.yml +0 -20
data/example/config_payload_column_index.yml +0 -20
data/example/config_prevent_duplicate_insert.yml +0 -30
data/example/config_progress_log_interval.yml +0 -31
data/example/config_replace.yml +0 -30
data/example/config_replace_backup.yml +0 -32
data/example/config_replace_backup_field_partitioned_table.yml +0 -34
data/example/config_replace_backup_partitioned_table.yml +0 -34
data/example/config_replace_field_partitioned_table.yml +0 -33
data/example/config_replace_partitioned_table.yml +0 -33
data/example/config_replace_schema_update_options.yml +0 -33
data/example/config_skip_file_generation.yml +0 -32
data/example/config_table_strftime.yml +0 -30
data/example/config_template_table.yml +0 -21
data/example/config_uncompressed.yml +0 -1
data/example/config_with_rehearsal.yml +0 -33
data/example/example.csv +0 -17
data/example/example.yml +0 -1
data/example/example2_1.csv +0 -1
data/example/example2_2.csv +0 -1
data/example/example4_1.csv +0 -1
data/example/example4_2.csv +0 -1
data/example/example4_3.csv +0 -1
data/example/example4_4.csv +0 -1
data/example/json_key.json +0 -12
data/example/nested_example.jsonl +0 -16
data/example/schema.json +0 -30
data/example/schema_expose_errors.json +0 -30

data/example/config_guess_from_embulk_schema.yml DELETED Viewed

@@ -1,29 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  compression: GZIP
-  source_format: NEWLINE_DELIMITED_JSON
-  auto_create_dataset: true
-  auto_create_table: true

data/example/config_guess_with_column_options.yml DELETED Viewed

@@ -1,40 +0,0 @@
-# embulk gem install embulk-parser-jsonl
-in:
-  type: file
-  path_prefix: example/nested_example.jsonl
-  parser:
-    type: jsonl
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: json,        type: json}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  compression: GZIP
-  source_format: NEWLINE_DELIMITED_JSON
-  auto_create_dataset: true
-  auto_create_table: true
-  column_options:
-    - {name: date,        type: TIMESTAMP, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
-    - {name: timestamp,   type: STRING,    timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
-    - {name: long,        type: STRING}
-    - {name: string,      type: STRING}
-    - {name: double,      type: STRING}
-    - {name: boolean,     type: STRING}
-    - name: json
-      type: RECORD
-      fields:
-        - {name: k1,      type: STRING}
-        - {name: k2,      type: STRING}
-# 2015-07-13
-# 2015-07-12 15:00:00

data/example/config_gzip.yml DELETED Viewed

	@@ -1 +0,0 @@
1	- example/config_csv.yml

data/example/config_jsonl.yml DELETED Viewed

	@@ -1 +0,0 @@
1	- example/config_replace.yml

data/example/config_max_threads.yml DELETED Viewed

@@ -1,34 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example4_
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  source_format: NEWLINE_DELIMITED_JSON
-  compression: NONE
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-exec:
-  type: local
-  min_output_tasks: 2
-  max_threads: 2

data/example/config_min_ouput_tasks.yml DELETED Viewed

@@ -1,34 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example2_
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  source_format: NEWLINE_DELIMITED_JSON
-  compression: GZIP
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-exec:
-  type: local
-  min_output_tasks: 8
-  max_threads: 4

data/example/config_mode_append.yml DELETED Viewed

@@ -1,30 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: append
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  compression: GZIP
-  source_format: CSV
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  delete_from_local_when_job_end: false

data/example/config_mode_append_direct.yml DELETED Viewed

@@ -1,30 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: append_direct
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  compression: GZIP
-  source_format: CSV
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  delete_from_local_when_job_end: false

data/example/config_nested_record.yml DELETED Viewed

	@@ -1 +0,0 @@
1	- example/config_guess_with_column_options.yml

data/example/config_payload_column.yml DELETED Viewed

@@ -1,20 +0,0 @@
-# embulk gem install embulk-parser-none
-in:
-  type: file
-  path_prefix: example/example.jsonl
-  parser:
-    type: none
-    column_name: payload
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  compression: GZIP
-  source_format: NEWLINE_DELIMITED_JSON
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  payload_column: payload

data/example/config_payload_column_index.yml DELETED Viewed

@@ -1,20 +0,0 @@
-# embulk gem install embulk-parser-none
-in:
-  type: file
-  path_prefix: example/example.jsonl
-  parser:
-    type: none
-    column_name: payload
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  compression: GZIP
-  source_format: NEWLINE_DELIMITED_JSON
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  payload_column_index: 0

data/example/config_prevent_duplicate_insert.yml DELETED Viewed

@@ -1,30 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: append
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  source_format: NEWLINE_DELIMITED_JSON
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  prevent_duplicate_insert: true

data/example/config_progress_log_interval.yml DELETED Viewed

@@ -1,31 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  source_format: NEWLINE_DELIMITED_JSON
-  compression: NONE
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  progress_log_interval: 0.1

data/example/config_replace.yml DELETED Viewed

@@ -1,30 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  source_format: NEWLINE_DELIMITED_JSON
-  compression: NONE
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json

data/example/config_replace_backup.yml DELETED Viewed

@@ -1,32 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace_backup
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_table_name
-  dataset_old: your_dataset_name_old
-  table_old: your_table_name_old
-  source_format: NEWLINE_DELIMITED_JSON
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  skip_load: true # for debug

data/example/config_replace_backup_field_partitioned_table.yml DELETED Viewed

@@ -1,34 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace_backup
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_field_partitioned_table_name
-  table_old: your_field_partitioned_table_name_old
-  source_format: NEWLINE_DELIMITED_JSON
-  compression: NONE
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  time_partitioning:
-    type: 'DAY'
-    field: 'timestamp'

data/example/config_replace_backup_partitioned_table.yml DELETED Viewed

@@ -1,34 +0,0 @@
-in:
-  type: file
-  path_prefix: example/example.csv
-  parser:
-    type: csv
-    charset: UTF-8
-    newline: CRLF
-    null_string: 'NULL'
-    skip_header_lines: 1
-    comment_line_marker: '#'
-    columns:
-      - {name: date,        type: string}
-      - {name: timestamp,   type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
-      - {name: "null",      type: string}
-      - {name: long,        type: long}
-      - {name: string,      type: string}
-      - {name: double,      type: double}
-      - {name: boolean,     type: boolean}
-out:
-  type: bigquery
-  mode: replace_backup
-  auth_method: json_key
-  json_keyfile: example/your-project-000.json
-  dataset: your_dataset_name
-  table: your_partitioned_table_name$20160929
-  table_old: your_partitioned_table_name_old$20160929
-  source_format: NEWLINE_DELIMITED_JSON
-  compression: NONE
-  auto_create_dataset: true
-  auto_create_table: true
-  schema_file: example/schema.json
-  time_partitioning:
-    type: 'DAY'
-    expiration_ms: 100