embulk-output-bigquery 0.3.0.pre2 → 0.3.0.pre3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_max_threads.yml +34 -0
- data/example/config_min_ouput_tasks.yml +34 -0
- data/example/example2_1.csv +17 -0
- data/example/example2_2.csv +17 -0
- data/example/example4_1.csv +17 -0
- data/example/example4_2.csv +17 -0
- data/example/example4_3.csv +17 -0
- data/example/example4_4.csv +17 -0
- data/lib/embulk/output/bigquery.rb +8 -9
- data/lib/embulk/output/bigquery/bigquery_client.rb +1 -1
- data/lib/embulk/output/bigquery/file_writer.rb +39 -13
- data/test/test_configure.rb +1 -1
- data/test/test_example.rb +1 -0
- data/test/test_file_writer.rb +33 -23
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1dbd4398ccaccaf5b4d2d812dcbf64d2610c41c
|
4
|
+
data.tar.gz: 3b1ad06791c8b6b65139dfaf7d82e7e1f253c35d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5307cce2d9983b7c2710180f28e0e19515d6895dcd3b5d6e0c48503c57b5e555332831ed328a21daf9743e7688d7d3a51e1f81e4436370311e8defdd1dbc6b22
|
7
|
+
data.tar.gz: eeae7ee0e52c2e218478d5d58d3a2eeb76fedb02e5f6d1ba85a393069dbf1c455028658593b3dcedeff7f6dced4de9c31185e33d2fde824bfc9d4b3494acbc09
|
data/README.md
CHANGED
@@ -65,7 +65,7 @@ Options for intermediate local files
|
|
65
65
|
| name | type | required? | default | description |
|
66
66
|
|:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
|
67
67
|
| path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
|
68
|
-
| sequence_format | string | optional | .%d.%
|
68
|
+
| sequence_format | string | optional | .%d.%d | Sequence format for pid, thread id |
|
69
69
|
| file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
|
70
70
|
| skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
|
71
71
|
| delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.3.0.
|
3
|
+
spec.version = "0.3.0.pre3"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example4_
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
exec:
|
32
|
+
type: local
|
33
|
+
min_output_tasks: 2
|
34
|
+
max_threads: 2
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example2_
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: GZIP
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
exec:
|
32
|
+
type: local
|
33
|
+
min_output_tasks: 8
|
34
|
+
max_threads: 4
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -0,0 +1,17 @@
|
|
1
|
+
date,timestamp,null,long,string,double,boolean
|
2
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
|
3
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
|
4
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
|
5
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
|
6
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
|
7
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
|
8
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
|
9
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
|
10
|
+
2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
|
11
|
+
2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
|
12
|
+
2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
|
13
|
+
2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
|
14
|
+
2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
|
15
|
+
2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
|
16
|
+
2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
|
17
|
+
2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
|
@@ -63,7 +63,7 @@ module Embulk
|
|
63
63
|
'application_name' => config.param('application_name', :string, :default => 'Embulk BigQuery plugin'),
|
64
64
|
|
65
65
|
'path_prefix' => config.param('path_prefix', :string, :default => nil),
|
66
|
-
'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%
|
66
|
+
'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%d'),
|
67
67
|
'file_ext' => config.param('file_ext', :string, :default => nil),
|
68
68
|
'skip_file_generation' => config.param('skip_file_generation', :bool, :default => false),
|
69
69
|
'compression' => config.param('compression', :string, :default => 'NONE'),
|
@@ -273,11 +273,12 @@ module Embulk
|
|
273
273
|
path_pattern = "#{task['path_prefix']}*#{task['file_ext']}"
|
274
274
|
Embulk.logger.info { "embulk-output-bigquery: Skip file generation. Get paths from `#{path_pattern}`" }
|
275
275
|
paths = Dir.glob(path_pattern)
|
276
|
-
task_reports = paths.map {|path| { '
|
276
|
+
task_reports = paths.map {|path| { 'num_input_rows' => 0 } }
|
277
277
|
else
|
278
278
|
task_reports = yield(task) # generates local files
|
279
279
|
Embulk.logger.info { "embulk-output-bigquery: task_reports: #{task_reports.to_json}" }
|
280
|
-
paths =
|
280
|
+
paths = FileWriter.paths
|
281
|
+
FileWriter.ios.each {|io| io.close rescue nil }
|
281
282
|
end
|
282
283
|
|
283
284
|
if task['skip_load'] # only for debug
|
@@ -294,11 +295,9 @@ module Embulk
|
|
294
295
|
|
295
296
|
if task['temp_table']
|
296
297
|
if task['mode'] == 'append'
|
297
|
-
bigquery.copy(task['temp_table'], task['table'],
|
298
|
-
write_disposition: 'WRITE_APPEND')
|
298
|
+
bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_APPEND')
|
299
299
|
else # replace or replace_backup
|
300
|
-
bigquery.copy(task['temp_table'], task['table'],
|
301
|
-
write_disposition: 'WRITE_TRUNCATE')
|
300
|
+
bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_TRUNCATE')
|
302
301
|
end
|
303
302
|
end
|
304
303
|
end
|
@@ -351,11 +350,11 @@ module Embulk
|
|
351
350
|
def add(page)
|
352
351
|
if task['with_rehearsal'] and @index == 0 and !@rehearsaled
|
353
352
|
page = page.to_a # to avoid https://github.com/embulk/embulk/issues/403
|
354
|
-
if @num_rows
|
353
|
+
if @num_rows >= task['rehearsal_counts']
|
355
354
|
Embulk.logger.info { "embulk-output-bigquery: Rehearsal started" }
|
356
355
|
begin
|
357
356
|
@bigquery.create_table(task['rehearsal_table'])
|
358
|
-
@bigquery.load(
|
357
|
+
@bigquery.load(FileWriter.paths.first, task['rehearsal_table'])
|
359
358
|
ensure
|
360
359
|
@bigquery.delete_table(task['rehearsal_table'])
|
361
360
|
end
|
@@ -245,7 +245,7 @@ module Embulk
|
|
245
245
|
status = _response.status.state
|
246
246
|
if status == "DONE"
|
247
247
|
Embulk.logger.info {
|
248
|
-
"embulk-output-bigquery: #{kind} job completed
|
248
|
+
"embulk-output-bigquery: #{kind} job completed... " \
|
249
249
|
"job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
|
250
250
|
}
|
251
251
|
break
|
@@ -7,8 +7,6 @@ module Embulk
|
|
7
7
|
module Output
|
8
8
|
class Bigquery < OutputPlugin
|
9
9
|
class FileWriter
|
10
|
-
attr_reader :path
|
11
|
-
|
12
10
|
def initialize(task, schema, index, converters = nil)
|
13
11
|
@task = task
|
14
12
|
@schema = schema
|
@@ -30,21 +28,49 @@ module Embulk
|
|
30
28
|
@formatter_proc = self.method(:to_jsonl)
|
31
29
|
end
|
32
30
|
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.reset_ios
|
34
|
+
@ios = Set.new
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.ios
|
38
|
+
@ios ||= Set.new
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.paths
|
42
|
+
ios.map {|io| io.path }
|
43
|
+
end
|
44
|
+
|
45
|
+
THREAD_LOCAL_IO_KEY = :embulk_output_bigquery_file_writer_io
|
46
|
+
|
47
|
+
# Create one io object for one output thread, that is, share among tasks
|
48
|
+
# Close theses shared io objects in transaction
|
49
|
+
#
|
50
|
+
# Thread IO must be created at #add because threads in #initialize or #commit
|
51
|
+
# are different (called from non-output threads). Note also that #add of the
|
52
|
+
# same instance would be called in different output threads
|
53
|
+
def thread_io
|
54
|
+
return Thread.current[THREAD_LOCAL_IO_KEY] if Thread.current[THREAD_LOCAL_IO_KEY]
|
33
55
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
56
|
+
path = sprintf(
|
57
|
+
"#{@task['path_prefix']}#{@task['sequence_format']}#{@task['file_ext']}",
|
58
|
+
Process.pid, Thread.current.object_id
|
59
|
+
)
|
60
|
+
if File.exist?(path)
|
61
|
+
Embulk.logger.warn { "embulk-output-bigquery: unlink already existing #{path}" }
|
62
|
+
File.unlink(path) rescue nil
|
39
63
|
end
|
40
|
-
|
64
|
+
Embulk.logger.info { "embulk-output-bigquery: create #{path}" }
|
65
|
+
file_io = File.open(path, 'w')
|
41
66
|
|
42
67
|
case @task['compression'].downcase
|
43
68
|
when 'gzip'
|
44
|
-
|
69
|
+
io = Zlib::GzipWriter.new(file_io)
|
45
70
|
else
|
46
|
-
|
71
|
+
io = file_io
|
47
72
|
end
|
73
|
+
Thread.current[THREAD_LOCAL_IO_KEY] = io
|
48
74
|
end
|
49
75
|
|
50
76
|
def to_payload(record)
|
@@ -72,13 +98,15 @@ module Embulk
|
|
72
98
|
end
|
73
99
|
|
74
100
|
def add(page)
|
101
|
+
io = thread_io
|
102
|
+
self.class.ios.add(io)
|
75
103
|
# I once tried to split IO writing into another IO thread using SizedQueue
|
76
104
|
# However, it resulted in worse performance, so I removed the codes.
|
77
105
|
page.each do |record|
|
78
106
|
Embulk.logger.trace { "embulk-output-bigquery: record #{record}" }
|
79
107
|
formatted_record = @formatter_proc.call(record)
|
80
108
|
Embulk.logger.trace { "embulk-output-bigquery: formatted_record #{formatted_record.chomp}" }
|
81
|
-
|
109
|
+
io.write formatted_record
|
82
110
|
@num_input_rows += 1
|
83
111
|
end
|
84
112
|
now = Time.now
|
@@ -91,10 +119,8 @@ module Embulk
|
|
91
119
|
end
|
92
120
|
|
93
121
|
def commit
|
94
|
-
@io.close rescue nil
|
95
122
|
task_report = {
|
96
123
|
'num_input_rows' => @num_input_rows,
|
97
|
-
'path' => @path,
|
98
124
|
}
|
99
125
|
end
|
100
126
|
end
|
data/test/test_configure.rb
CHANGED
@@ -74,7 +74,7 @@ module Embulk
|
|
74
74
|
assert_equal 5, task['retries']
|
75
75
|
assert_equal "Embulk BigQuery plugin", task['application_name']
|
76
76
|
# assert_equal "/tmp/embulk_output_bigquery_20160228-27184-pubcn0", task['path_prefix']
|
77
|
-
assert_equal ".%d.%
|
77
|
+
assert_equal ".%d.%d", task['sequence_format']
|
78
78
|
assert_equal ".csv", task['file_ext']
|
79
79
|
assert_equal false, task['skip_file_generation']
|
80
80
|
assert_equal "NONE", task['compression']
|
data/test/test_example.rb
CHANGED
@@ -20,6 +20,7 @@ if ENV['CONNECT']
|
|
20
20
|
files = files.reject {|file| File.symlink?(file) }
|
21
21
|
# files.shift
|
22
22
|
files.each do |config_path|
|
23
|
+
next if File.basename(config_path) == 'config_expose_errors.yml'
|
23
24
|
define_method(:"test_#{File.basename(config_path, ".yml")}") do
|
24
25
|
success = Bundler.with_clean_env do
|
25
26
|
cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
|
data/test/test_file_writer.rb
CHANGED
@@ -16,6 +16,11 @@ module Embulk
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
def setup
|
20
|
+
Thread.current[FileWriter::THREAD_LOCAL_IO_KEY] = nil
|
21
|
+
FileWriter.reset_ios
|
22
|
+
end
|
23
|
+
|
19
24
|
def default_task
|
20
25
|
{
|
21
26
|
'compression' => 'GZIP',
|
@@ -42,19 +47,30 @@ module Embulk
|
|
42
47
|
@converters ||= ValueConverterFactory.create_converters(default_task, schema)
|
43
48
|
end
|
44
49
|
|
50
|
+
def record
|
51
|
+
[true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
|
52
|
+
end
|
53
|
+
|
54
|
+
def page
|
55
|
+
[record]
|
56
|
+
end
|
57
|
+
|
45
58
|
sub_test_case "path" do
|
46
59
|
def test_path
|
47
60
|
task = default_task.merge('path_prefix' => 'tmp/foo', 'sequence_format' => '', 'file_ext' => '.1')
|
48
61
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
49
|
-
|
62
|
+
|
63
|
+
begin
|
64
|
+
file_writer.add(page)
|
65
|
+
ensure
|
66
|
+
io.close rescue nil
|
67
|
+
end
|
68
|
+
path = FileWriter.paths.first
|
69
|
+
assert_equal 'tmp/foo.1', path
|
50
70
|
end
|
51
71
|
end
|
52
72
|
|
53
73
|
sub_test_case "formatter" do
|
54
|
-
def record
|
55
|
-
[true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
|
56
|
-
end
|
57
|
-
|
58
74
|
def test_payload_column_index
|
59
75
|
task = default_task.merge('payload_column_index' => 0)
|
60
76
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
@@ -86,42 +102,36 @@ module Embulk
|
|
86
102
|
end
|
87
103
|
|
88
104
|
sub_test_case "compression" do
|
89
|
-
def record
|
90
|
-
[true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
|
91
|
-
end
|
92
|
-
|
93
|
-
def page
|
94
|
-
[record]
|
95
|
-
end
|
96
|
-
|
97
105
|
def test_gzip
|
98
106
|
task = default_task.merge('compression' => 'GZIP')
|
99
107
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
100
|
-
io = file_writer.instance_variable_get(:@io)
|
101
|
-
assert_equal Zlib::GzipWriter, io.class
|
102
108
|
|
103
109
|
begin
|
104
110
|
file_writer.add(page)
|
111
|
+
io = FileWriter.ios.first
|
112
|
+
assert_equal Zlib::GzipWriter, io.class
|
105
113
|
ensure
|
106
|
-
|
114
|
+
io.close rescue nil
|
107
115
|
end
|
108
|
-
|
109
|
-
|
116
|
+
path = FileWriter.paths.first
|
117
|
+
assert_true File.exist?(path)
|
118
|
+
assert_nothing_raised { Zlib::GzipReader.open(path) {|gz| } }
|
110
119
|
end
|
111
120
|
|
112
121
|
def test_uncompressed
|
113
122
|
task = default_task.merge('compression' => 'NONE')
|
114
123
|
file_writer = FileWriter.new(task, schema, 0, converters)
|
115
|
-
io = file_writer.instance_variable_get(:@io)
|
116
|
-
assert_equal File, io.class
|
117
124
|
|
118
125
|
begin
|
119
126
|
file_writer.add(page)
|
127
|
+
io = FileWriter.ios.first
|
128
|
+
assert_equal File, io.class
|
120
129
|
ensure
|
121
|
-
|
130
|
+
io.close rescue nil
|
122
131
|
end
|
123
|
-
|
124
|
-
|
132
|
+
path = FileWriter.paths.first
|
133
|
+
assert_true File.exist?(path)
|
134
|
+
assert_raise { Zlib::GzipReader.open(path) {|gz| } }
|
125
135
|
end
|
126
136
|
end
|
127
137
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0.
|
4
|
+
version: 0.3.0.pre3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-03-
|
12
|
+
date: 2016-03-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-api-client
|
@@ -104,6 +104,8 @@ files:
|
|
104
104
|
- example/config_guess_with_column_options.yml
|
105
105
|
- example/config_gzip.yml
|
106
106
|
- example/config_jsonl.yml
|
107
|
+
- example/config_max_threads.yml
|
108
|
+
- example/config_min_ouput_tasks.yml
|
107
109
|
- example/config_mode_append.yml
|
108
110
|
- example/config_mode_append_direct.yml
|
109
111
|
- example/config_payload_column.yml
|
@@ -119,6 +121,12 @@ files:
|
|
119
121
|
- example/example.csv
|
120
122
|
- example/example.jsonl
|
121
123
|
- example/example.yml
|
124
|
+
- example/example2_1.csv
|
125
|
+
- example/example2_2.csv
|
126
|
+
- example/example4_1.csv
|
127
|
+
- example/example4_2.csv
|
128
|
+
- example/example4_3.csv
|
129
|
+
- example/example4_4.csv
|
122
130
|
- example/json_key.json
|
123
131
|
- example/nested_example.jsonl
|
124
132
|
- example/schema.json
|