embulk-output-bigquery 0.6.0 → 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -3
  3. data/Gemfile +1 -1
  4. data/README.md +16 -11
  5. data/embulk-output-bigquery.gemspec +15 -3
  6. data/lib/embulk/output/bigquery.rb +3 -3
  7. data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
  8. data/test/test_helper.rb +7 -1
  9. data/test/test_value_converter_factory.rb +86 -0
  10. metadata +70 -60
  11. data/example/config_append_direct_schema_update_options.yml +0 -31
  12. data/example/config_client_options.yml +0 -33
  13. data/example/config_csv.yml +0 -30
  14. data/example/config_delete_in_advance.yml +0 -29
  15. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  16. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  17. data/example/config_expose_errors.yml +0 -30
  18. data/example/config_gcs.yml +0 -32
  19. data/example/config_guess_from_embulk_schema.yml +0 -29
  20. data/example/config_guess_with_column_options.yml +0 -40
  21. data/example/config_gzip.yml +0 -1
  22. data/example/config_jsonl.yml +0 -1
  23. data/example/config_max_threads.yml +0 -34
  24. data/example/config_min_ouput_tasks.yml +0 -34
  25. data/example/config_mode_append.yml +0 -30
  26. data/example/config_mode_append_direct.yml +0 -30
  27. data/example/config_nested_record.yml +0 -1
  28. data/example/config_payload_column.yml +0 -20
  29. data/example/config_payload_column_index.yml +0 -20
  30. data/example/config_progress_log_interval.yml +0 -31
  31. data/example/config_replace.yml +0 -30
  32. data/example/config_replace_backup.yml +0 -32
  33. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  34. data/example/config_replace_backup_partitioned_table.yml +0 -34
  35. data/example/config_replace_field_partitioned_table.yml +0 -33
  36. data/example/config_replace_partitioned_table.yml +0 -33
  37. data/example/config_replace_schema_update_options.yml +0 -33
  38. data/example/config_skip_file_generation.yml +0 -32
  39. data/example/config_table_strftime.yml +0 -30
  40. data/example/config_template_table.yml +0 -21
  41. data/example/config_uncompressed.yml +0 -1
  42. data/example/config_with_rehearsal.yml +0 -33
  43. data/example/example.csv +0 -17
  44. data/example/example.yml +0 -1
  45. data/example/example2_1.csv +0 -1
  46. data/example/example2_2.csv +0 -1
  47. data/example/example4_1.csv +0 -1
  48. data/example/example4_2.csv +0 -1
  49. data/example/example4_3.csv +0 -1
  50. data/example/example4_4.csv +0 -1
  51. data/example/json_key.json +0 -12
  52. data/example/nested_example.jsonl +0 -16
  53. data/example/schema.json +0 -30
  54. data/example/schema_expose_errors.json +0 -30
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append_direct
21
- auth_method: service_account
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1 +0,0 @@
1
- example/config_guess_with_column_options.yml
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: service_account
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column: payload
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: service_account
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column_index: 0
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- progress_log_interval: 0.1
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
@@ -1,32 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- dataset_old: your_dataset_name_old
27
- table_old: your_table_name_old
28
- source_format: NEWLINE_DELIMITED_JSON
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- skip_load: true # for debug
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_field_partitioned_table_name
26
- table_old: your_field_partitioned_table_name_old
27
- source_format: NEWLINE_DELIMITED_JSON
28
- compression: NONE
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- time_partitioning:
33
- type: 'DAY'
34
- field: 'timestamp'
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace_backup
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_partitioned_table_name$20160929
26
- table_old: your_partitioned_table_name_old$20160929
27
- source_format: NEWLINE_DELIMITED_JSON
28
- compression: NONE
29
- auto_create_dataset: true
30
- auto_create_table: true
31
- schema_file: example/schema.json
32
- time_partitioning:
33
- type: 'DAY'
34
- expiration_ms: 100
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_field_partitioned_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- time_partitioning:
32
- type: 'DAY'
33
- field: 'timestamp'
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_partitioned_table_name$20160929
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- time_partitioning:
32
- type: 'DAY'
33
- expiration_ms: 100
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_partitioned_table_name$20160929
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- time_partitioning:
32
- type: 'DAY'
33
- expiration_ms: 100
@@ -1,32 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- path_prefix: example/example
31
- file_ext: .jsonl
32
- skip_file_generation: true
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name_%Y%m%d
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- skip_load: true # for debug
@@ -1,21 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: service_account
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name_%Y%m%d
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- template_table: your_table_name
20
- payload_column: payload
21
- skip_load: true # for debug
@@ -1 +0,0 @@
1
- example/config_replace.yml
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: service_account
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- with_rehearsal: true
31
- rehearsal_counts: 1
32
- skip_load: true # for debug
33
- compression: GZIP