embulk-output-bigquery 0.3.0.pre2 → 0.3.0.pre3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_max_threads.yml +34 -0
- data/example/config_min_ouput_tasks.yml +34 -0
- data/example/example2_1.csv +17 -0
- data/example/example2_2.csv +17 -0
- data/example/example4_1.csv +17 -0
- data/example/example4_2.csv +17 -0
- data/example/example4_3.csv +17 -0
- data/example/example4_4.csv +17 -0
- data/lib/embulk/output/bigquery.rb +8 -9
- data/lib/embulk/output/bigquery/bigquery_client.rb +1 -1
- data/lib/embulk/output/bigquery/file_writer.rb +39 -13
- data/test/test_configure.rb +1 -1
- data/test/test_example.rb +1 -0
- data/test/test_file_writer.rb +33 -23
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1dbd4398ccaccaf5b4d2d812dcbf64d2610c41c
|
4
|
+
data.tar.gz: 3b1ad06791c8b6b65139dfaf7d82e7e1f253c35d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5307cce2d9983b7c2710180f28e0e19515d6895dcd3b5d6e0c48503c57b5e555332831ed328a21daf9743e7688d7d3a51e1f81e4436370311e8defdd1dbc6b22
|
7
|
+
data.tar.gz: eeae7ee0e52c2e218478d5d58d3a2eeb76fedb02e5f6d1ba85a393069dbf1c455028658593b3dcedeff7f6dced4de9c31185e33d2fde824bfc9d4b3494acbc09
|
data/README.md
CHANGED
@@ -65,7 +65,7 @@ Options for intermediate local files
|
|
65
65
|
| name | type | required? | default | description |
|
66
66
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
67
67
|
| path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
|
68
|
-
| sequence_format | string | optional | .%d.%
|
68
|
+
| sequence_format | string | optional | .%d.%d | Sequence format for pid, thread id |
|
69
69
|
| file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
|
70
70
|
| skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
|
71
71
|
| delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.3.0.
|
3
|
+
spec.version = "0.3.0.pre3"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example4_
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
exec:
|
32
|
+
type: local
|
33
|
+
min_output_tasks: 2
|
34
|
+
max_threads: 2
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example2_
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: GZIP
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
exec:
|
32
|
+
type: local
|
33
|
+
min_output_tasks: 8
|
34
|
+
max_threads: 4
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -63,7 +63,7 @@ module Embulk
|
|
63
63
|
'application_name' => config.param('application_name', :string, :default => 'Embulk BigQuery plugin'),
|
64
64
|
|
65
65
|
'path_prefix' => config.param('path_prefix', :string, :default => nil),
|
66
|
-
'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%
|
66
|
+
'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%d'),
|
67
67
|
'file_ext' => config.param('file_ext', :string, :default => nil),
|
68
68
|
'skip_file_generation' => config.param('skip_file_generation', :bool, :default => false),
|
69
69
|
'compression' => config.param('compression', :string, :default => 'NONE'),
|
@@ -273,11 +273,12 @@ module Embulk
|
|
273
273
|
path_pattern = "#{task['path_prefix']}*#{task['file_ext']}"
|
274
274
|
Embulk.logger.info { "embulk-output-bigquery: Skip file generation. Get paths from `#{path_pattern}`" }
|
275
275
|
paths = Dir.glob(path_pattern)
|
276
|
-
task_reports = paths.map {|path| { '
|
276
|
+
task_reports = paths.map {|path| { 'num_input_rows' => 0 } }
|
277
277
|
else
|
278
278
|
task_reports = yield(task) # generates local files
|
279
279
|
Embulk.logger.info { "embulk-output-bigquery: task_reports: #{task_reports.to_json}" }
|
280
|
-
paths =
|
280
|
+
paths = FileWriter.paths
|
281
|
+
FileWriter.ios.each {|io| io.close rescue nil }
|
281
282
|
end
|
282
283
|
|
283
284
|
if task['skip_load'] # only for debug
|
@@ -294,11 +295,9 @@ module Embulk
|
|
294
295
|
|
295
296
|
if task['temp_table']
|
296
297
|
if task['mode'] == 'append'
|
297
|
-
bigquery.copy(task['temp_table'], task['table'],
|
298
|
-
write_disposition: 'WRITE_APPEND')
|
298
|
+
bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_APPEND')
|
299
299
|
else # replace or replace_backup
|
300
|
-
bigquery.copy(task['temp_table'], task['table'],
|
301
|
-
write_disposition: 'WRITE_TRUNCATE')
|
300
|
+
bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_TRUNCATE')
|
302
301
|
end
|
303
302
|
end
|
304
303
|
end
|
@@ -351,11 +350,11 @@ module Embulk
|
|
351
350
|
def add(page)
|
352
351
|
if task['with_rehearsal'] and @index == 0 and !@rehearsaled
|
353
352
|
page = page.to_a # to avoid https://github.com/embulk/embulk/issues/403
|
354
|
-
if @num_rows
|
353
|
+
if @num_rows >= task['rehearsal_counts']
|
355
354
|
Embulk.logger.info { "embulk-output-bigquery: Rehearsal started" }
|
356
355
|
begin
|
357
356
|
@bigquery.create_table(task['rehearsal_table'])
|
358
|
-
@bigquery.load(
|
357
|
+
@bigquery.load(FileWriter.paths.first, task['rehearsal_table'])
|
359
358
|
ensure
|
360
359
|
@bigquery.delete_table(task['rehearsal_table'])
|
361
360
|
end
|
@@ -245,7 +245,7 @@ module Embulk
|
|
245
245
|
status = _response.status.state
|
246
246
|
if status == "DONE"
|
247
247
|
Embulk.logger.info {
|
248
|
-
"embulk-output-bigquery: #{kind} job completed
|
248
|
+
"embulk-output-bigquery: #{kind} job completed... " \
|
249
249
|
"job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
|
250
250
|
}
|
251
251
|
break
|
@@ -7,8 +7,6 @@ module Embulk
|
|
7
7
|
module Output
|
8
8
|
class Bigquery < OutputPlugin
|
9
9
|
class FileWriter
|
10
|
-
attr_reader :path
|
11
|
-
|
12
10
|
def initialize(task, schema, index, converters = nil)
|
13
11
|
@task = task
|
14
12
|
@schema = schema
|
@@ -30,21 +28,49 @@ module Embulk
|
|
30
28
|
@formatter_proc = self.method(:to_jsonl)
|
31
29
|
end
|
32
30
|
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.reset_ios
|
34
|
+
@ios = Set.new
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.ios
|
38
|
+
@ios ||= Set.new
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.paths
|
42
|
+
ios.map {|io| io.path }
|
43
|
+
end
|
44
|
+
|
45
|
+
THREAD_LOCAL_IO_KEY = :embulk_output_bigquery_file_writer_io
|
46
|
+
|
47
|
+
# Create one io object for one output thread, that is, share among tasks
|
48
|
+
# Close theses shared io objects in transaction
|
49
|
+
#
|
50
|
+
# Thread IO must be created at #add because threads in #initialize or #commit
|
51
|
+
# are different (called from non-output threads). Note also that #add of the
|
52
|
+
# same instance would be called in different output threads
|
53
|
+
def thread_io
|
54
|
+
return Thread.current[THREAD_LOCAL_IO_KEY] if Thread.current[THREAD_LOCAL_IO_KEY]
|
33
55
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
56
|
+
path = sprintf(
|
57
|
+
"#{@task['path_prefix']}#{@task['sequence_format']}#{@task['file_ext']}",
|
58
|
+
Process.pid, Thread.current.object_id
|
59
|
+
)
|
60
|
+
if File.exist?(path)
|
61
|
+
Embulk.logger.warn { "embulk-output-bigquery: unlink already existing #{path}" }
|
62
|
+
File.unlink(path) rescue nil
|
39
63
|
end
|
40
|
-
|
64
|
+
Embulk.logger.info { "embulk-output-bigquery: create #{path}" }
|
65
|
+
file_io = File.open(path, 'w')
|
41
66
|
|
42
67
|
case @task['compression'].downcase
|
43
68
|
when 'gzip'
|
44
|
-
|
69
|
+
io = Zlib::GzipWriter.new(file_io)
|
45
70
|
else
|
46
|
-
|
71
|
+
io = file_io
|
47
72
|
end
|
73
|
+
Thread.current[THREAD_LOCAL_IO_KEY] = io
|
48
74
|
end
|
49
75
|
|
50
76
|
def to_payload(record)
|
@@ -72,13 +98,15 @@ module Embulk
|
|
72
98
|
end
|
73
99
|
|
74
100
|
def add(page)
|
101
|
+
io = thread_io
|
102
|
+
self.class.ios.add(io)
|
75
103
|
# I once tried to split IO writing into another IO thread using SizedQueue
|
76
104
|
# However, it resulted in worse performance, so I removed the codes.
|
77
105
|
page.each do |record|
|
78
106
|
Embulk.logger.trace { "embulk-output-bigquery: record #{record}" }
|
79
107
|
formatted_record = @formatter_proc.call(record)
|
80
108
|
Embulk.logger.trace { "embulk-output-bigquery: formatted_record #{formatted_record.chomp}" }
|
81
|
-
|
109
|
+
io.write formatted_record
|
82
110
|
@num_input_rows += 1
|
83
111
|
end
|
84
112
|
now = Time.now
|
@@ -91,10 +119,8 @@ module Embulk
|
|
91
119
|
end
|
92
120
|
|
93
121
|
def commit
|
94
|
-
@io.close rescue nil
|
95
122
|
task_report = {
|
96
123
|
'num_input_rows' => @num_input_rows,
|
97
|
-
'path' => @path,
|
98
124
|
}
|
99
125
|
end
|
100
126
|
end
|
data/test/test_configure.rb
CHANGED
@@ -74,7 +74,7 @@ module Embulk
|
|
74
74
|
assert_equal 5, task['retries']
|
75
75
|
assert_equal "Embulk BigQuery plugin", task['application_name']
|
76
76
|
# assert_equal "/tmp/embulk_output_bigquery_20160228-27184-pubcn0", task['path_prefix']
|
77
|
-
assert_equal ".%d.%
|
77
|
+
assert_equal ".%d.%d", task['sequence_format']
|
78
78
|
assert_equal ".csv", task['file_ext']
|
79
79
|
assert_equal false, task['skip_file_generation']
|
80
80
|
assert_equal "NONE", task['compression']
|
data/test/test_example.rb
CHANGED
@@ -20,6 +20,7 @@ if ENV['CONNECT']
|
|
20
20
|
files = files.reject {|file| File.symlink?(file) }
|
21
21
|
# files.shift
|
22
22
|
files.each do |config_path|
|
23
|
+
next if File.basename(config_path) == 'config_expose_errors.yml'
|
23
24
|
define_method(:"test_#{File.basename(config_path, ".yml")}") do
|
24
25
|
success = Bundler.with_clean_env do
|
25
26
|
cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
|
data/test/test_file_writer.rb
CHANGED
@@ -16,6 +16,11 @@ module Embulk
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
def setup
|
20
|
+
Thread.current[FileWriter::THREAD_LOCAL_IO_KEY] = nil
|
21
|
+
FileWriter.reset_ios
|
22
|
+
end
|
23
|
+
|
19
24
|
def default_task
|
20
25
|
{
|
21
26
|
'compression' => 'GZIP',
|
@@ -42,19 +47,30 @@ module Embulk
|
|
42
47
|
@converters ||= ValueConverterFactory.create_converters(default_task, schema)
|
43
48
|
end
|
44
49
|
|
50
|
+
def record
|
51
|
+
[true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
|
52
|
+
end
|
53
|
+
|
54
|
+
def page
|
55
|
+
[record]
|
56
|
+
end
|
57
|
+
|
45
58
|
sub_test_case "path" do
|
46
59
|
def test_path
|
47
60
|
task = default_task.merge('path_prefix' => 'tmp/foo', 'sequence_format' => '', 'file_ext' => '.1')
|
48
61
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
49
|
-
|
62
|
+
|
63
|
+
begin
|
64
|
+
file_writer.add(page)
|
65
|
+
ensure
|
66
|
+
io.close rescue nil
|
67
|
+
end
|
68
|
+
path = FileWriter.paths.first
|
69
|
+
assert_equal 'tmp/foo.1', path
|
50
70
|
end
|
51
71
|
end
|
52
72
|
|
53
73
|
sub_test_case "formatter" do
|
54
|
-
def record
|
55
|
-
[true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
|
56
|
-
end
|
57
|
-
|
58
74
|
def test_payload_column_index
|
59
75
|
task = default_task.merge('payload_column_index' => 0)
|
60
76
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
@@ -86,42 +102,36 @@ module Embulk
|
|
86
102
|
end
|
87
103
|
|
88
104
|
sub_test_case "compression" do
|
89
|
-
def record
|
90
|
-
[true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
|
91
|
-
end
|
92
|
-
|
93
|
-
def page
|
94
|
-
[record]
|
95
|
-
end
|
96
|
-
|
97
105
|
def test_gzip
|
98
106
|
task = default_task.merge('compression' => 'GZIP')
|
99
107
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
100
|
-
io = file_writer.instance_variable_get(:@io)
|
101
|
-
assert_equal Zlib::GzipWriter, io.class
|
102
108
|
|
103
109
|
begin
|
104
110
|
file_writer.add(page)
|
111
|
+
io = FileWriter.ios.first
|
112
|
+
assert_equal Zlib::GzipWriter, io.class
|
105
113
|
ensure
|
106
|
-
|
114
|
+
io.close rescue nil
|
107
115
|
end
|
108
|
-
|
109
|
-
|
116
|
+
path = FileWriter.paths.first
|
117
|
+
assert_true File.exist?(path)
|
118
|
+
assert_nothing_raised { Zlib::GzipReader.open(path) {|gz| } }
|
110
119
|
end
|
111
120
|
|
112
121
|
def test_uncompressed
|
113
122
|
task = default_task.merge('compression' => 'NONE')
|
114
123
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
115
|
-
io = file_writer.instance_variable_get(:@io)
|
116
|
-
assert_equal File, io.class
|
117
124
|
|
118
125
|
begin
|
119
126
|
file_writer.add(page)
|
127
|
+
io = FileWriter.ios.first
|
128
|
+
assert_equal File, io.class
|
120
129
|
ensure
|
121
|
-
|
130
|
+
io.close rescue nil
|
122
131
|
end
|
123
|
-
|
124
|
-
|
132
|
+
path = FileWriter.paths.first
|
133
|
+
assert_true File.exist?(path)
|
134
|
+
assert_raise { Zlib::GzipReader.open(path) {|gz| } }
|
125
135
|
end
|
126
136
|
end
|
127
137
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0.
|
4
|
+
version: 0.3.0.pre3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-03-
|
12
|
+
date: 2016-03-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-api-client
|
@@ -104,6 +104,8 @@ files:
|
|
104
104
|
- example/config_guess_with_column_options.yml
|
105
105
|
- example/config_gzip.yml
|
106
106
|
- example/config_jsonl.yml
|
107
|
+
- example/config_max_threads.yml
|
108
|
+
- example/config_min_ouput_tasks.yml
|
107
109
|
- example/config_mode_append.yml
|
108
110
|
- example/config_mode_append_direct.yml
|
109
111
|
- example/config_payload_column.yml
|
@@ -119,6 +121,12 @@ files:
|
|
119
121
|
- example/example.csv
|
120
122
|
- example/example.jsonl
|
121
123
|
- example/example.yml
|
124
|
+
- example/example2_1.csv
|
125
|
+
- example/example2_2.csv
|
126
|
+
- example/example4_1.csv
|
127
|
+
- example/example4_2.csv
|
128
|
+
- example/example4_3.csv
|
129
|
+
- example/example4_4.csv
|
122
130
|
- example/json_key.json
|
123
131
|
- example/nested_example.jsonl
|
124
132
|
- example/schema.json
|