embulk-output-bigquery 0.5.0 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +29 -4
  3. data/README.md +71 -42
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +11 -20
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  8. data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
  9. data/test/test_bigquery_client.rb +1 -5
  10. data/test/test_configure.rb +4 -12
  11. data/test/test_helper.rb +7 -1
  12. data/test/test_transaction.rb +5 -6
  13. data/test/test_value_converter_factory.rb +86 -0
  14. metadata +29 -51
  15. data/example/config_append_direct_schema_update_options.yml +0 -31
  16. data/example/config_client_options.yml +0 -33
  17. data/example/config_csv.yml +0 -30
  18. data/example/config_delete_in_advance.yml +0 -29
  19. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  20. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  21. data/example/config_expose_errors.yml +0 -30
  22. data/example/config_gcs.yml +0 -32
  23. data/example/config_guess_from_embulk_schema.yml +0 -29
  24. data/example/config_guess_with_column_options.yml +0 -40
  25. data/example/config_gzip.yml +0 -1
  26. data/example/config_jsonl.yml +0 -1
  27. data/example/config_max_threads.yml +0 -34
  28. data/example/config_min_ouput_tasks.yml +0 -34
  29. data/example/config_mode_append.yml +0 -30
  30. data/example/config_mode_append_direct.yml +0 -30
  31. data/example/config_nested_record.yml +0 -1
  32. data/example/config_payload_column.yml +0 -20
  33. data/example/config_payload_column_index.yml +0 -20
  34. data/example/config_progress_log_interval.yml +0 -31
  35. data/example/config_replace.yml +0 -30
  36. data/example/config_replace_backup.yml +0 -32
  37. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  38. data/example/config_replace_backup_partitioned_table.yml +0 -34
  39. data/example/config_replace_field_partitioned_table.yml +0 -33
  40. data/example/config_replace_partitioned_table.yml +0 -33
  41. data/example/config_replace_schema_update_options.yml +0 -33
  42. data/example/config_skip_file_generation.yml +0 -32
  43. data/example/config_table_strftime.yml +0 -30
  44. data/example/config_template_table.yml +0 -21
  45. data/example/config_uncompressed.yml +0 -1
  46. data/example/config_with_rehearsal.yml +0 -33
  47. data/example/example.csv +0 -17
  48. data/example/example.yml +0 -1
  49. data/example/example2_1.csv +0 -1
  50. data/example/example2_2.csv +0 -1
  51. data/example/example4_1.csv +0 -1
  52. data/example/example4_2.csv +0 -1
  53. data/example/example4_3.csv +0 -1
  54. data/example/example4_4.csv +0 -1
  55. data/example/json_key.json +0 -12
  56. data/example/nested_example.jsonl +0 -16
  57. data/example/schema.json +0 -30
  58. data/example/schema_expose_errors.json +0 -30
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: CSV
27
- compression: GZIP
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
@@ -1,29 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: delete_in_advance
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: delete_in_advance
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_field_partitioned_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- time_partitioning:
32
- type: 'DAY'
33
- field: timestamp
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: delete_in_advance
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_partitioned_table_name$20160929
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- time_partitioning:
32
- type: 'DAY'
33
- expiration_ms: 100
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema_expose_errors.json
@@ -1,32 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: GZIP
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- gcs_bucket: your_bucket_name
32
- auto_create_gcs_bucket: true
@@ -1,29 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- compression: GZIP
27
- source_format: NEWLINE_DELIMITED_JSON
28
- auto_create_dataset: true
29
- auto_create_table: true
@@ -1,40 +0,0 @@
1
- # embulk gem install embulk-parser-jsonl
2
- in:
3
- type: file
4
- path_prefix: example/nested_example.jsonl
5
- parser:
6
- type: jsonl
7
- columns:
8
- - {name: date, type: string}
9
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
10
- - {name: "null", type: string}
11
- - {name: long, type: long}
12
- - {name: string, type: string}
13
- - {name: double, type: double}
14
- - {name: json, type: json}
15
- - {name: boolean, type: boolean}
16
- out:
17
- type: bigquery
18
- mode: replace
19
- auth_method: json_key
20
- json_keyfile: example/your-project-000.json
21
- dataset: your_dataset_name
22
- table: your_table_name
23
- compression: GZIP
24
- source_format: NEWLINE_DELIMITED_JSON
25
- auto_create_dataset: true
26
- auto_create_table: true
27
- column_options:
28
- - {name: date, type: TIMESTAMP, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
29
- - {name: timestamp, type: STRING, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
30
- - {name: long, type: STRING}
31
- - {name: string, type: STRING}
32
- - {name: double, type: STRING}
33
- - {name: boolean, type: STRING}
34
- - name: json
35
- type: RECORD
36
- fields:
37
- - {name: k1, type: STRING}
38
- - {name: k2, type: STRING}
39
- # 2015-07-13
40
- # 2015-07-12 15:00:00
@@ -1 +0,0 @@
1
- example/config_csv.yml
@@ -1 +0,0 @@
1
- example/config_replace.yml
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example4_
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- exec:
32
- type: local
33
- min_output_tasks: 2
34
- max_threads: 2
@@ -1,34 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example2_
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: GZIP
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- exec:
32
- type: local
33
- min_output_tasks: 8
34
- max_threads: 4
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append
21
- auth_method: json_key
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1,30 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- null_string: 'NULL'
8
- skip_header_lines: 1
9
- comment_line_marker: '#'
10
- columns:
11
- - {name: date, type: string}
12
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
13
- - {name: "null", type: string}
14
- - {name: long, type: long}
15
- - {name: string, type: string}
16
- - {name: double, type: double}
17
- - {name: boolean, type: boolean}
18
- out:
19
- type: bigquery
20
- mode: append_direct
21
- auth_method: json_key
22
- json_keyfile: example/your-project-000.json
23
- dataset: your_dataset_name
24
- table: your_table_name
25
- compression: GZIP
26
- source_format: CSV
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- delete_from_local_when_job_end: false
@@ -1 +0,0 @@
1
- example/config_guess_with_column_options.yml
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: json_key
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column: payload
@@ -1,20 +0,0 @@
1
- # embulk gem install embulk-parser-none
2
- in:
3
- type: file
4
- path_prefix: example/example.jsonl
5
- parser:
6
- type: none
7
- column_name: payload
8
- out:
9
- type: bigquery
10
- mode: replace
11
- auth_method: json_key
12
- json_keyfile: example/your-project-000.json
13
- dataset: your_dataset_name
14
- table: your_table_name
15
- compression: GZIP
16
- source_format: NEWLINE_DELIMITED_JSON
17
- auto_create_dataset: true
18
- auto_create_table: true
19
- schema_file: example/schema.json
20
- payload_column_index: 0