embulk-output-bigquery 0.4.14 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +28 -0
  3. data/README.md +74 -77
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +19 -49
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/bigquery_client.rb +2 -11
  8. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  9. data/lib/embulk/output/bigquery/value_converter_factory.rb +12 -0
  10. data/test/test_bigquery_client.rb +1 -5
  11. data/test/test_configure.rb +10 -19
  12. data/test/test_example.rb +0 -1
  13. data/test/test_helper.rb +4 -1
  14. data/test/test_transaction.rb +22 -62
  15. data/test/test_value_converter_factory.rb +42 -0
  16. metadata +29 -52
  17. data/example/config_append_direct_schema_update_options.yml +0 -31
  18. data/example/config_client_options.yml +0 -33
  19. data/example/config_csv.yml +0 -30
  20. data/example/config_delete_in_advance.yml +0 -29
  21. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  22. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  23. data/example/config_expose_errors.yml +0 -30
  24. data/example/config_gcs.yml +0 -32
  25. data/example/config_guess_from_embulk_schema.yml +0 -29
  26. data/example/config_guess_with_column_options.yml +0 -40
  27. data/example/config_gzip.yml +0 -1
  28. data/example/config_jsonl.yml +0 -1
  29. data/example/config_max_threads.yml +0 -34
  30. data/example/config_min_ouput_tasks.yml +0 -34
  31. data/example/config_mode_append.yml +0 -30
  32. data/example/config_mode_append_direct.yml +0 -30
  33. data/example/config_nested_record.yml +0 -1
  34. data/example/config_payload_column.yml +0 -20
  35. data/example/config_payload_column_index.yml +0 -20
  36. data/example/config_prevent_duplicate_insert.yml +0 -30
  37. data/example/config_progress_log_interval.yml +0 -31
  38. data/example/config_replace.yml +0 -30
  39. data/example/config_replace_backup.yml +0 -32
  40. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  41. data/example/config_replace_backup_partitioned_table.yml +0 -34
  42. data/example/config_replace_field_partitioned_table.yml +0 -33
  43. data/example/config_replace_partitioned_table.yml +0 -33
  44. data/example/config_replace_schema_update_options.yml +0 -33
  45. data/example/config_skip_file_generation.yml +0 -32
  46. data/example/config_table_strftime.yml +0 -30
  47. data/example/config_template_table.yml +0 -21
  48. data/example/config_uncompressed.yml +0 -1
  49. data/example/config_with_rehearsal.yml +0 -33
  50. data/example/example.csv +0 -17
  51. data/example/example.yml +0 -1
  52. data/example/example2_1.csv +0 -1
  53. data/example/example2_2.csv +0 -1
  54. data/example/example4_1.csv +0 -1
  55. data/example/example4_2.csv +0 -1
  56. data/example/example4_3.csv +0 -1
  57. data/example/example4_4.csv +0 -1
  58. data/example/json_key.json +0 -12
  59. data/example/nested_example.jsonl +0 -16
  60. data/example/schema.json +0 -30
  61. data/example/schema_expose_errors.json +0 -30
@@ -1,29 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- compression: GZIP
27
- source_format: NEWLINE_DELIMITED_JSON
28
- auto_create_dataset: true
29
- auto_create_table: true
@@ -1,40 +0,0 @@
1
- # embulk gem install embulk-parser-jsonl
2
- in:
3
- type: file
4
- path_prefix: example/nested_example.jsonl
5
- parser:
6
- type: jsonl
7
- columns:
8
- - {name: date, type: string}
9
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
10
- - {name: "null", type: string}
11
- - {name: long, type: long}
12
- - {name: string, type: string}
13
- - {name: double, type: double}
14
- - {name: json, type: json}
15
- - {name: boolean, type: boolean}
16
- out:
17
- type: bigquery
18
- mode: replace
19
- auth_method: json_key
20
- json_keyfile: example/your-project-000.json
21
- dataset: your_dataset_name
22
- table: your_table_name
23
- compression: GZIP
24
- source_format: NEWLINE_DELIMITED_JSON
25
- auto_create_dataset: true
26
- auto_create_table: true
27
- column_options:
28
- - {name: date, type: TIMESTAMP, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
29
- - {name: timestamp, type: STRING, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
30
- - {name: long, type: STRING}
31
- - {name: string, type: STRING}
32
- - {name: double, type: STRING}
33
- - {name: boolean, type: STRING}
34
- - name: json
35
- type: RECORD
36
- fields:
37
- - {name: k1, type: STRING}
38
- - {name: k2, type: STRING}
39
- # 2015-07-13
40
- # 2015-07-12 15:00:00
@@ -1 +0,0 @@
1
- example/config_csv.yml
@@ -1 +0,0 @@
1
- example/config_replace.yml
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example4_
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- exec:
32
- type: local
33
- min_output_tasks: 2
34
- max_threads: 2
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example2_
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: GZIP
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- exec:
32
- type: local
33
- min_output_tasks: 8
34
- max_threads: 4
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append
21
- auth_method: json_key
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append_direct
21
- auth_method: json_key
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1 +0,0 @@
1
- example/config_guess_with_column_options.yml
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: json_key
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column: payload
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: json_key
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column_index: 0
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: append
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- prevent_duplicate_insert: true
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- progress_log_interval: 0.1
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
@@ -1,32 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- dataset_old: your_dataset_name_old
27
- table_old: your_table_name_old
28
- source_format: NEWLINE_DELIMITED_JSON
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- skip_load: true # for debug
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_field_partitioned_table_name
26
- table_old: your_field_partitioned_table_name_old
27
- source_format: NEWLINE_DELIMITED_JSON
28
- compression: NONE
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- time_partitioning:
33
- type: 'DAY'
34
- field: 'timestamp'
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_partitioned_table_name$20160929
26
- table_old: your_partitioned_table_name_old$20160929
27
- source_format: NEWLINE_DELIMITED_JSON
28
- compression: NONE
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- time_partitioning:
33
- type: 'DAY'
34
- expiration_ms: 100