embulk-output-bigquery 0.4.14 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +28 -0
  3. data/README.md +74 -77
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +19 -49
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/bigquery_client.rb +2 -11
  8. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  9. data/lib/embulk/output/bigquery/value_converter_factory.rb +12 -0
  10. data/test/test_bigquery_client.rb +1 -5
  11. data/test/test_configure.rb +10 -19
  12. data/test/test_example.rb +0 -1
  13. data/test/test_helper.rb +4 -1
  14. data/test/test_transaction.rb +22 -62
  15. data/test/test_value_converter_factory.rb +42 -0
  16. metadata +29 -52
  17. data/example/config_append_direct_schema_update_options.yml +0 -31
  18. data/example/config_client_options.yml +0 -33
  19. data/example/config_csv.yml +0 -30
  20. data/example/config_delete_in_advance.yml +0 -29
  21. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  22. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  23. data/example/config_expose_errors.yml +0 -30
  24. data/example/config_gcs.yml +0 -32
  25. data/example/config_guess_from_embulk_schema.yml +0 -29
  26. data/example/config_guess_with_column_options.yml +0 -40
  27. data/example/config_gzip.yml +0 -1
  28. data/example/config_jsonl.yml +0 -1
  29. data/example/config_max_threads.yml +0 -34
  30. data/example/config_min_ouput_tasks.yml +0 -34
  31. data/example/config_mode_append.yml +0 -30
  32. data/example/config_mode_append_direct.yml +0 -30
  33. data/example/config_nested_record.yml +0 -1
  34. data/example/config_payload_column.yml +0 -20
  35. data/example/config_payload_column_index.yml +0 -20
  36. data/example/config_prevent_duplicate_insert.yml +0 -30
  37. data/example/config_progress_log_interval.yml +0 -31
  38. data/example/config_replace.yml +0 -30
  39. data/example/config_replace_backup.yml +0 -32
  40. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  41. data/example/config_replace_backup_partitioned_table.yml +0 -34
  42. data/example/config_replace_field_partitioned_table.yml +0 -33
  43. data/example/config_replace_partitioned_table.yml +0 -33
  44. data/example/config_replace_schema_update_options.yml +0 -33
  45. data/example/config_skip_file_generation.yml +0 -32
  46. data/example/config_table_strftime.yml +0 -30
  47. data/example/config_template_table.yml +0 -21
  48. data/example/config_uncompressed.yml +0 -1
  49. data/example/config_with_rehearsal.yml +0 -33
  50. data/example/example.csv +0 -17
  51. data/example/example.yml +0 -1
  52. data/example/example2_1.csv +0 -1
  53. data/example/example2_2.csv +0 -1
  54. data/example/example4_1.csv +0 -1
  55. data/example/example4_2.csv +0 -1
  56. data/example/example4_3.csv +0 -1
  57. data/example/example4_4.csv +0 -1
  58. data/example/json_key.json +0 -12
  59. data/example/nested_example.jsonl +0 -16
  60. data/example/schema.json +0 -30
  61. data/example/schema_expose_errors.json +0 -30
@@ -1,29 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- compression: GZIP
27
- source_format: NEWLINE_DELIMITED_JSON
28
- auto_create_dataset: true
29
- auto_create_table: true
@@ -1,40 +0,0 @@
1
- # embulk gem install embulk-parser-jsonl
2
- in:
3
- type: file
4
- path_prefix: example/nested_example.jsonl
5
- parser:
6
- type: jsonl
7
- columns:
8
- - {name: date, type: string}
9
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
10
- - {name: "null", type: string}
11
- - {name: long, type: long}
12
- - {name: string, type: string}
13
- - {name: double, type: double}
14
- - {name: json, type: json}
15
- - {name: boolean, type: boolean}
16
- out:
17
- type: bigquery
18
- mode: replace
19
- auth_method: json_key
20
- json_keyfile: example/your-project-000.json
21
- dataset: your_dataset_name
22
- table: your_table_name
23
- compression: GZIP
24
- source_format: NEWLINE_DELIMITED_JSON
25
- auto_create_dataset: true
26
- auto_create_table: true
27
- column_options:
28
- - {name: date, type: TIMESTAMP, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
29
- - {name: timestamp, type: STRING, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
30
- - {name: long, type: STRING}
31
- - {name: string, type: STRING}
32
- - {name: double, type: STRING}
33
- - {name: boolean, type: STRING}
34
- - name: json
35
- type: RECORD
36
- fields:
37
- - {name: k1, type: STRING}
38
- - {name: k2, type: STRING}
39
- # 2015-07-13
40
- # 2015-07-12 15:00:00
@@ -1 +0,0 @@
1
- example/config_csv.yml
@@ -1 +0,0 @@
1
- example/config_replace.yml
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example4_
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- exec:
32
- type: local
33
- min_output_tasks: 2
34
- max_threads: 2
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example2_
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: GZIP
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- exec:
32
- type: local
33
- min_output_tasks: 8
34
- max_threads: 4
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append
21
- auth_method: json_key
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append_direct
21
- auth_method: json_key
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1 +0,0 @@
1
- example/config_guess_with_column_options.yml
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: json_key
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column: payload
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: json_key
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column_index: 0
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: append
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- prevent_duplicate_insert: true
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- progress_log_interval: 0.1
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
@@ -1,32 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- dataset_old: your_dataset_name_old
27
- table_old: your_table_name_old
28
- source_format: NEWLINE_DELIMITED_JSON
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- skip_load: true # for debug
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_field_partitioned_table_name
26
- table_old: your_field_partitioned_table_name_old
27
- source_format: NEWLINE_DELIMITED_JSON
28
- compression: NONE
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- time_partitioning:
33
- type: 'DAY'
34
- field: 'timestamp'
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_partitioned_table_name$20160929
26
- table_old: your_partitioned_table_name_old$20160929
27
- source_format: NEWLINE_DELIMITED_JSON
28
- compression: NONE
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- time_partitioning:
33
- type: 'DAY'
34
- expiration_ms: 100