embulk-output-bigquery 0.5.0 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +29 -4
- data/README.md +71 -42
- data/embulk-output-bigquery.gemspec +10 -3
- data/lib/embulk/output/bigquery.rb +11 -20
- data/lib/embulk/output/bigquery/auth.rb +35 -0
- data/lib/embulk/output/bigquery/google_client.rb +3 -34
- data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
- data/test/test_bigquery_client.rb +1 -5
- data/test/test_configure.rb +4 -12
- data/test/test_helper.rb +7 -1
- data/test/test_transaction.rb +5 -6
- data/test/test_value_converter_factory.rb +86 -0
- metadata +29 -51
- data/example/config_append_direct_schema_update_options.yml +0 -31
- data/example/config_client_options.yml +0 -33
- data/example/config_csv.yml +0 -30
- data/example/config_delete_in_advance.yml +0 -29
- data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
- data/example/config_delete_in_advance_partitioned_table.yml +0 -33
- data/example/config_expose_errors.yml +0 -30
- data/example/config_gcs.yml +0 -32
- data/example/config_guess_from_embulk_schema.yml +0 -29
- data/example/config_guess_with_column_options.yml +0 -40
- data/example/config_gzip.yml +0 -1
- data/example/config_jsonl.yml +0 -1
- data/example/config_max_threads.yml +0 -34
- data/example/config_min_ouput_tasks.yml +0 -34
- data/example/config_mode_append.yml +0 -30
- data/example/config_mode_append_direct.yml +0 -30
- data/example/config_nested_record.yml +0 -1
- data/example/config_payload_column.yml +0 -20
- data/example/config_payload_column_index.yml +0 -20
- data/example/config_progress_log_interval.yml +0 -31
- data/example/config_replace.yml +0 -30
- data/example/config_replace_backup.yml +0 -32
- data/example/config_replace_backup_field_partitioned_table.yml +0 -34
- data/example/config_replace_backup_partitioned_table.yml +0 -34
- data/example/config_replace_field_partitioned_table.yml +0 -33
- data/example/config_replace_partitioned_table.yml +0 -33
- data/example/config_replace_schema_update_options.yml +0 -33
- data/example/config_skip_file_generation.yml +0 -32
- data/example/config_table_strftime.yml +0 -30
- data/example/config_template_table.yml +0 -21
- data/example/config_uncompressed.yml +0 -1
- data/example/config_with_rehearsal.yml +0 -33
- data/example/example.csv +0 -17
- data/example/example.yml +0 -1
- data/example/example2_1.csv +0 -1
- data/example/example2_2.csv +0 -1
- data/example/example4_1.csv +0 -1
- data/example/example4_2.csv +0 -1
- data/example/example4_3.csv +0 -1
- data/example/example4_4.csv +0 -1
- data/example/json_key.json +0 -12
- data/example/nested_example.jsonl +0 -16
- data/example/schema.json +0 -30
- data/example/schema_expose_errors.json +0 -30
data/example/config_csv.yml
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: replace
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
source_format: CSV
|
27
|
-
compression: GZIP
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema.json
|
@@ -1,29 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: delete_in_advance
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
auto_create_dataset: true
|
28
|
-
auto_create_table: true
|
29
|
-
schema_file: example/schema.json
|
@@ -1,33 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: delete_in_advance
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_field_partitioned_table_name
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
compression: NONE
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema.json
|
31
|
-
time_partitioning:
|
32
|
-
type: 'DAY'
|
33
|
-
field: timestamp
|
@@ -1,33 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: delete_in_advance
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_partitioned_table_name$20160929
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
compression: NONE
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema.json
|
31
|
-
time_partitioning:
|
32
|
-
type: 'DAY'
|
33
|
-
expiration_ms: 100
|
@@ -1,30 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: replace
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
compression: NONE
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema_expose_errors.json
|
data/example/config_gcs.yml
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: replace
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
compression: GZIP
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema.json
|
31
|
-
gcs_bucket: your_bucket_name
|
32
|
-
auto_create_gcs_bucket: true
|
@@ -1,29 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: replace
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
compression: GZIP
|
27
|
-
source_format: NEWLINE_DELIMITED_JSON
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
@@ -1,40 +0,0 @@
|
|
1
|
-
# embulk gem install embulk-parser-jsonl
|
2
|
-
in:
|
3
|
-
type: file
|
4
|
-
path_prefix: example/nested_example.jsonl
|
5
|
-
parser:
|
6
|
-
type: jsonl
|
7
|
-
columns:
|
8
|
-
- {name: date, type: string}
|
9
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
10
|
-
- {name: "null", type: string}
|
11
|
-
- {name: long, type: long}
|
12
|
-
- {name: string, type: string}
|
13
|
-
- {name: double, type: double}
|
14
|
-
- {name: json, type: json}
|
15
|
-
- {name: boolean, type: boolean}
|
16
|
-
out:
|
17
|
-
type: bigquery
|
18
|
-
mode: replace
|
19
|
-
auth_method: json_key
|
20
|
-
json_keyfile: example/your-project-000.json
|
21
|
-
dataset: your_dataset_name
|
22
|
-
table: your_table_name
|
23
|
-
compression: GZIP
|
24
|
-
source_format: NEWLINE_DELIMITED_JSON
|
25
|
-
auto_create_dataset: true
|
26
|
-
auto_create_table: true
|
27
|
-
column_options:
|
28
|
-
- {name: date, type: TIMESTAMP, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
|
29
|
-
- {name: timestamp, type: STRING, timestamp_format: "%Y-%m-%d", timezone: "+09:00"}
|
30
|
-
- {name: long, type: STRING}
|
31
|
-
- {name: string, type: STRING}
|
32
|
-
- {name: double, type: STRING}
|
33
|
-
- {name: boolean, type: STRING}
|
34
|
-
- name: json
|
35
|
-
type: RECORD
|
36
|
-
fields:
|
37
|
-
- {name: k1, type: STRING}
|
38
|
-
- {name: k2, type: STRING}
|
39
|
-
# 2015-07-13
|
40
|
-
# 2015-07-12 15:00:00
|
data/example/config_gzip.yml
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
example/config_csv.yml
|
data/example/config_jsonl.yml
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
example/config_replace.yml
|
@@ -1,34 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example4_
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: replace
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
compression: NONE
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema.json
|
31
|
-
exec:
|
32
|
-
type: local
|
33
|
-
min_output_tasks: 2
|
34
|
-
max_threads: 2
|
@@ -1,34 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example2_
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
newline: CRLF
|
8
|
-
null_string: 'NULL'
|
9
|
-
skip_header_lines: 1
|
10
|
-
comment_line_marker: '#'
|
11
|
-
columns:
|
12
|
-
- {name: date, type: string}
|
13
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
-
- {name: "null", type: string}
|
15
|
-
- {name: long, type: long}
|
16
|
-
- {name: string, type: string}
|
17
|
-
- {name: double, type: double}
|
18
|
-
- {name: boolean, type: boolean}
|
19
|
-
out:
|
20
|
-
type: bigquery
|
21
|
-
mode: replace
|
22
|
-
auth_method: json_key
|
23
|
-
json_keyfile: example/your-project-000.json
|
24
|
-
dataset: your_dataset_name
|
25
|
-
table: your_table_name
|
26
|
-
source_format: NEWLINE_DELIMITED_JSON
|
27
|
-
compression: GZIP
|
28
|
-
auto_create_dataset: true
|
29
|
-
auto_create_table: true
|
30
|
-
schema_file: example/schema.json
|
31
|
-
exec:
|
32
|
-
type: local
|
33
|
-
min_output_tasks: 8
|
34
|
-
max_threads: 4
|
@@ -1,30 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
null_string: 'NULL'
|
8
|
-
skip_header_lines: 1
|
9
|
-
comment_line_marker: '#'
|
10
|
-
columns:
|
11
|
-
- {name: date, type: string}
|
12
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
13
|
-
- {name: "null", type: string}
|
14
|
-
- {name: long, type: long}
|
15
|
-
- {name: string, type: string}
|
16
|
-
- {name: double, type: double}
|
17
|
-
- {name: boolean, type: boolean}
|
18
|
-
out:
|
19
|
-
type: bigquery
|
20
|
-
mode: append
|
21
|
-
auth_method: json_key
|
22
|
-
json_keyfile: example/your-project-000.json
|
23
|
-
dataset: your_dataset_name
|
24
|
-
table: your_table_name
|
25
|
-
compression: GZIP
|
26
|
-
source_format: CSV
|
27
|
-
auto_create_dataset: true
|
28
|
-
auto_create_table: true
|
29
|
-
schema_file: example/schema.json
|
30
|
-
delete_from_local_when_job_end: false
|
@@ -1,30 +0,0 @@
|
|
1
|
-
in:
|
2
|
-
type: file
|
3
|
-
path_prefix: example/example.csv
|
4
|
-
parser:
|
5
|
-
type: csv
|
6
|
-
charset: UTF-8
|
7
|
-
null_string: 'NULL'
|
8
|
-
skip_header_lines: 1
|
9
|
-
comment_line_marker: '#'
|
10
|
-
columns:
|
11
|
-
- {name: date, type: string}
|
12
|
-
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
13
|
-
- {name: "null", type: string}
|
14
|
-
- {name: long, type: long}
|
15
|
-
- {name: string, type: string}
|
16
|
-
- {name: double, type: double}
|
17
|
-
- {name: boolean, type: boolean}
|
18
|
-
out:
|
19
|
-
type: bigquery
|
20
|
-
mode: append_direct
|
21
|
-
auth_method: json_key
|
22
|
-
json_keyfile: example/your-project-000.json
|
23
|
-
dataset: your_dataset_name
|
24
|
-
table: your_table_name
|
25
|
-
compression: GZIP
|
26
|
-
source_format: CSV
|
27
|
-
auto_create_dataset: true
|
28
|
-
auto_create_table: true
|
29
|
-
schema_file: example/schema.json
|
30
|
-
delete_from_local_when_job_end: false
|
@@ -1 +0,0 @@
|
|
1
|
-
example/config_guess_with_column_options.yml
|
@@ -1,20 +0,0 @@
|
|
1
|
-
# embulk gem install embulk-parser-none
|
2
|
-
in:
|
3
|
-
type: file
|
4
|
-
path_prefix: example/example.jsonl
|
5
|
-
parser:
|
6
|
-
type: none
|
7
|
-
column_name: payload
|
8
|
-
out:
|
9
|
-
type: bigquery
|
10
|
-
mode: replace
|
11
|
-
auth_method: json_key
|
12
|
-
json_keyfile: example/your-project-000.json
|
13
|
-
dataset: your_dataset_name
|
14
|
-
table: your_table_name
|
15
|
-
compression: GZIP
|
16
|
-
source_format: NEWLINE_DELIMITED_JSON
|
17
|
-
auto_create_dataset: true
|
18
|
-
auto_create_table: true
|
19
|
-
schema_file: example/schema.json
|
20
|
-
payload_column: payload
|
@@ -1,20 +0,0 @@
|
|
1
|
-
# embulk gem install embulk-parser-none
|
2
|
-
in:
|
3
|
-
type: file
|
4
|
-
path_prefix: example/example.jsonl
|
5
|
-
parser:
|
6
|
-
type: none
|
7
|
-
column_name: payload
|
8
|
-
out:
|
9
|
-
type: bigquery
|
10
|
-
mode: replace
|
11
|
-
auth_method: json_key
|
12
|
-
json_keyfile: example/your-project-000.json
|
13
|
-
dataset: your_dataset_name
|
14
|
-
table: your_table_name
|
15
|
-
compression: GZIP
|
16
|
-
source_format: NEWLINE_DELIMITED_JSON
|
17
|
-
auto_create_dataset: true
|
18
|
-
auto_create_table: true
|
19
|
-
schema_file: example/schema.json
|
20
|
-
payload_column_index: 0
|