embulk-output-bigquery 0.3.0.pre2 → 0.3.0.pre3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d94d238ce16c412168d14a2d598c4858c2517a1
4
- data.tar.gz: 98c6a413ce767acc0db2a1b4b6fa8f1f43f1d7db
3
+ metadata.gz: f1dbd4398ccaccaf5b4d2d812dcbf64d2610c41c
4
+ data.tar.gz: 3b1ad06791c8b6b65139dfaf7d82e7e1f253c35d
5
5
  SHA512:
6
- metadata.gz: d5aee1620e2171c7ca84101c196bb85015bd53d1a5816949353e3e921559dcc96211a81c1c9d6a35a563de2c4656ed98c8b495577ba57d1a8e74e046bb9ab594
7
- data.tar.gz: 50f32129feea6bb33c7574e0d7aa73e387e37ced9ebe5d96a550225de78bddb365ab603b490a0a8361b008f16332a1092e84c278cf25e21a168700b8a751023a
6
+ metadata.gz: 5307cce2d9983b7c2710180f28e0e19515d6895dcd3b5d6e0c48503c57b5e555332831ed328a21daf9743e7688d7d3a51e1f81e4436370311e8defdd1dbc6b22
7
+ data.tar.gz: eeae7ee0e52c2e218478d5d58d3a2eeb76fedb02e5f6d1ba85a393069dbf1c455028658593b3dcedeff7f6dced4de9c31185e33d2fde824bfc9d4b3494acbc09
data/README.md CHANGED
@@ -65,7 +65,7 @@ Options for intermediate local files
65
65
  | name | type | required? | default | description |
66
66
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
67
67
  | path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
68
- | sequence_format | string | optional | .%d.%03d | Sequence format for pid, task index |
68
+ | sequence_format | string | optional | .%d.%d | Sequence format for pid, thread id |
69
69
  | file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
70
70
  | skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
71
71
  | delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.3.0.pre2"
3
+ spec.version = "0.3.0.pre3"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -0,0 +1,34 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example4_
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: example/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ compression: NONE
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json
31
+ exec:
32
+ type: local
33
+ min_output_tasks: 2
34
+ max_threads: 2
@@ -0,0 +1,34 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example2_
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: example/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ compression: GZIP
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json
31
+ exec:
32
+ type: local
33
+ min_output_tasks: 8
34
+ max_threads: 4
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -63,7 +63,7 @@ module Embulk
63
63
  'application_name' => config.param('application_name', :string, :default => 'Embulk BigQuery plugin'),
64
64
 
65
65
  'path_prefix' => config.param('path_prefix', :string, :default => nil),
66
- 'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%03d'),
66
+ 'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%d'),
67
67
  'file_ext' => config.param('file_ext', :string, :default => nil),
68
68
  'skip_file_generation' => config.param('skip_file_generation', :bool, :default => false),
69
69
  'compression' => config.param('compression', :string, :default => 'NONE'),
@@ -273,11 +273,12 @@ module Embulk
273
273
  path_pattern = "#{task['path_prefix']}*#{task['file_ext']}"
274
274
  Embulk.logger.info { "embulk-output-bigquery: Skip file generation. Get paths from `#{path_pattern}`" }
275
275
  paths = Dir.glob(path_pattern)
276
- task_reports = paths.map {|path| { 'path' => path, 'num_input_rows' => 0 } }
276
+ task_reports = paths.map {|path| { 'num_input_rows' => 0 } }
277
277
  else
278
278
  task_reports = yield(task) # generates local files
279
279
  Embulk.logger.info { "embulk-output-bigquery: task_reports: #{task_reports.to_json}" }
280
- paths = task_reports.map {|report| report['path'] }
280
+ paths = FileWriter.paths
281
+ FileWriter.ios.each {|io| io.close rescue nil }
281
282
  end
282
283
 
283
284
  if task['skip_load'] # only for debug
@@ -294,11 +295,9 @@ module Embulk
294
295
 
295
296
  if task['temp_table']
296
297
  if task['mode'] == 'append'
297
- bigquery.copy(task['temp_table'], task['table'],
298
- write_disposition: 'WRITE_APPEND')
298
+ bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_APPEND')
299
299
  else # replace or replace_backup
300
- bigquery.copy(task['temp_table'], task['table'],
301
- write_disposition: 'WRITE_TRUNCATE')
300
+ bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_TRUNCATE')
302
301
  end
303
302
  end
304
303
  end
@@ -351,11 +350,11 @@ module Embulk
351
350
  def add(page)
352
351
  if task['with_rehearsal'] and @index == 0 and !@rehearsaled
353
352
  page = page.to_a # to avoid https://github.com/embulk/embulk/issues/403
354
- if @num_rows > task['rehearsal_counts']
353
+ if @num_rows >= task['rehearsal_counts']
355
354
  Embulk.logger.info { "embulk-output-bigquery: Rehearsal started" }
356
355
  begin
357
356
  @bigquery.create_table(task['rehearsal_table'])
358
- @bigquery.load(@file_writer.path, task['rehearsal_table'])
357
+ @bigquery.load(FileWriter.paths.first, task['rehearsal_table'])
359
358
  ensure
360
359
  @bigquery.delete_table(task['rehearsal_table'])
361
360
  end
@@ -245,7 +245,7 @@ module Embulk
245
245
  status = _response.status.state
246
246
  if status == "DONE"
247
247
  Embulk.logger.info {
248
- "embulk-output-bigquery: #{kind} job completed successfully... " \
248
+ "embulk-output-bigquery: #{kind} job completed... " \
249
249
  "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
250
250
  }
251
251
  break
@@ -7,8 +7,6 @@ module Embulk
7
7
  module Output
8
8
  class Bigquery < OutputPlugin
9
9
  class FileWriter
10
- attr_reader :path
11
-
12
10
  def initialize(task, schema, index, converters = nil)
13
11
  @task = task
14
12
  @schema = schema
@@ -30,21 +28,49 @@ module Embulk
30
28
  @formatter_proc = self.method(:to_jsonl)
31
29
  end
32
30
  end
31
+ end
32
+
33
+ def self.reset_ios
34
+ @ios = Set.new
35
+ end
36
+
37
+ def self.ios
38
+ @ios ||= Set.new
39
+ end
40
+
41
+ def self.paths
42
+ ios.map {|io| io.path }
43
+ end
44
+
45
+ THREAD_LOCAL_IO_KEY = :embulk_output_bigquery_file_writer_io
46
+
47
+ # Create one io object for one output thread, that is, share among tasks
48
+ # Close theses shared io objects in transaction
49
+ #
50
+ # Thread IO must be created at #add because threads in #initialize or #commit
51
+ # are different (called from non-output threads). Note also that #add of the
52
+ # same instance would be called in different output threads
53
+ def thread_io
54
+ return Thread.current[THREAD_LOCAL_IO_KEY] if Thread.current[THREAD_LOCAL_IO_KEY]
33
55
 
34
- @path = sprintf("#{@task['path_prefix']}#{@task['sequence_format']}#{@task['file_ext']}", Process.pid, index)
35
- Embulk.logger.info { "embulk-output-bigquery: will create #{@path}" }
36
- if File.exist?(@path)
37
- Embulk.logger.warn { "embulk-output-bigquery: unlink already existing #{@path}" }
38
- File.unlink(@path) rescue nil
56
+ path = sprintf(
57
+ "#{@task['path_prefix']}#{@task['sequence_format']}#{@task['file_ext']}",
58
+ Process.pid, Thread.current.object_id
59
+ )
60
+ if File.exist?(path)
61
+ Embulk.logger.warn { "embulk-output-bigquery: unlink already existing #{path}" }
62
+ File.unlink(path) rescue nil
39
63
  end
40
- @file_io = File.open(@path, 'w')
64
+ Embulk.logger.info { "embulk-output-bigquery: create #{path}" }
65
+ file_io = File.open(path, 'w')
41
66
 
42
67
  case @task['compression'].downcase
43
68
  when 'gzip'
44
- @io = Zlib::GzipWriter.new(@file_io)
69
+ io = Zlib::GzipWriter.new(file_io)
45
70
  else
46
- @io = @file_io
71
+ io = file_io
47
72
  end
73
+ Thread.current[THREAD_LOCAL_IO_KEY] = io
48
74
  end
49
75
 
50
76
  def to_payload(record)
@@ -72,13 +98,15 @@ module Embulk
72
98
  end
73
99
 
74
100
  def add(page)
101
+ io = thread_io
102
+ self.class.ios.add(io)
75
103
  # I once tried to split IO writing into another IO thread using SizedQueue
76
104
  # However, it resulted in worse performance, so I removed the codes.
77
105
  page.each do |record|
78
106
  Embulk.logger.trace { "embulk-output-bigquery: record #{record}" }
79
107
  formatted_record = @formatter_proc.call(record)
80
108
  Embulk.logger.trace { "embulk-output-bigquery: formatted_record #{formatted_record.chomp}" }
81
- @io.write formatted_record
109
+ io.write formatted_record
82
110
  @num_input_rows += 1
83
111
  end
84
112
  now = Time.now
@@ -91,10 +119,8 @@ module Embulk
91
119
  end
92
120
 
93
121
  def commit
94
- @io.close rescue nil
95
122
  task_report = {
96
123
  'num_input_rows' => @num_input_rows,
97
- 'path' => @path,
98
124
  }
99
125
  end
100
126
  end
@@ -74,7 +74,7 @@ module Embulk
74
74
  assert_equal 5, task['retries']
75
75
  assert_equal "Embulk BigQuery plugin", task['application_name']
76
76
  # assert_equal "/tmp/embulk_output_bigquery_20160228-27184-pubcn0", task['path_prefix']
77
- assert_equal ".%d.%03d", task['sequence_format']
77
+ assert_equal ".%d.%d", task['sequence_format']
78
78
  assert_equal ".csv", task['file_ext']
79
79
  assert_equal false, task['skip_file_generation']
80
80
  assert_equal "NONE", task['compression']
data/test/test_example.rb CHANGED
@@ -20,6 +20,7 @@ if ENV['CONNECT']
20
20
  files = files.reject {|file| File.symlink?(file) }
21
21
  # files.shift
22
22
  files.each do |config_path|
23
+ next if File.basename(config_path) == 'config_expose_errors.yml'
23
24
  define_method(:"test_#{File.basename(config_path, ".yml")}") do
24
25
  success = Bundler.with_clean_env do
25
26
  cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
@@ -16,6 +16,11 @@ module Embulk
16
16
  end
17
17
  end
18
18
 
19
+ def setup
20
+ Thread.current[FileWriter::THREAD_LOCAL_IO_KEY] = nil
21
+ FileWriter.reset_ios
22
+ end
23
+
19
24
  def default_task
20
25
  {
21
26
  'compression' => 'GZIP',
@@ -42,19 +47,30 @@ module Embulk
42
47
  @converters ||= ValueConverterFactory.create_converters(default_task, schema)
43
48
  end
44
49
 
50
+ def record
51
+ [true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
52
+ end
53
+
54
+ def page
55
+ [record]
56
+ end
57
+
45
58
  sub_test_case "path" do
46
59
  def test_path
47
60
  task = default_task.merge('path_prefix' => 'tmp/foo', 'sequence_format' => '', 'file_ext' => '.1')
48
61
  file_writer = FileWriter.new(task, schema, 0, converters)
49
- assert_equal 'tmp/foo.1', file_writer.instance_variable_get(:@path)
62
+
63
+ begin
64
+ file_writer.add(page)
65
+ ensure
66
+ io.close rescue nil
67
+ end
68
+ path = FileWriter.paths.first
69
+ assert_equal 'tmp/foo.1', path
50
70
  end
51
71
  end
52
72
 
53
73
  sub_test_case "formatter" do
54
- def record
55
- [true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
56
- end
57
-
58
74
  def test_payload_column_index
59
75
  task = default_task.merge('payload_column_index' => 0)
60
76
  file_writer = FileWriter.new(task, schema, 0, converters)
@@ -86,42 +102,36 @@ module Embulk
86
102
  end
87
103
 
88
104
  sub_test_case "compression" do
89
- def record
90
- [true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
91
- end
92
-
93
- def page
94
- [record]
95
- end
96
-
97
105
  def test_gzip
98
106
  task = default_task.merge('compression' => 'GZIP')
99
107
  file_writer = FileWriter.new(task, schema, 0, converters)
100
- io = file_writer.instance_variable_get(:@io)
101
- assert_equal Zlib::GzipWriter, io.class
102
108
 
103
109
  begin
104
110
  file_writer.add(page)
111
+ io = FileWriter.ios.first
112
+ assert_equal Zlib::GzipWriter, io.class
105
113
  ensure
106
- file_writer.commit
114
+ io.close rescue nil
107
115
  end
108
- assert_true File.exist?(file_writer.path)
109
- assert_nothing_raised { Zlib::GzipReader.open(file_writer.path) {|gz| } }
116
+ path = FileWriter.paths.first
117
+ assert_true File.exist?(path)
118
+ assert_nothing_raised { Zlib::GzipReader.open(path) {|gz| } }
110
119
  end
111
120
 
112
121
  def test_uncompressed
113
122
  task = default_task.merge('compression' => 'NONE')
114
123
  file_writer = FileWriter.new(task, schema, 0, converters)
115
- io = file_writer.instance_variable_get(:@io)
116
- assert_equal File, io.class
117
124
 
118
125
  begin
119
126
  file_writer.add(page)
127
+ io = FileWriter.ios.first
128
+ assert_equal File, io.class
120
129
  ensure
121
- file_writer.commit
130
+ io.close rescue nil
122
131
  end
123
- assert_true File.exist?(file_writer.path)
124
- assert_raise { Zlib::GzipReader.open(file_writer.path) {|gz| } }
132
+ path = FileWriter.paths.first
133
+ assert_true File.exist?(path)
134
+ assert_raise { Zlib::GzipReader.open(path) {|gz| } }
125
135
  end
126
136
  end
127
137
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0.pre2
4
+ version: 0.3.0.pre3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-03-18 00:00:00.000000000 Z
12
+ date: 2016-03-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-api-client
@@ -104,6 +104,8 @@ files:
104
104
  - example/config_guess_with_column_options.yml
105
105
  - example/config_gzip.yml
106
106
  - example/config_jsonl.yml
107
+ - example/config_max_threads.yml
108
+ - example/config_min_ouput_tasks.yml
107
109
  - example/config_mode_append.yml
108
110
  - example/config_mode_append_direct.yml
109
111
  - example/config_payload_column.yml
@@ -119,6 +121,12 @@ files:
119
121
  - example/example.csv
120
122
  - example/example.jsonl
121
123
  - example/example.yml
124
+ - example/example2_1.csv
125
+ - example/example2_2.csv
126
+ - example/example4_1.csv
127
+ - example/example4_2.csv
128
+ - example/example4_3.csv
129
+ - example/example4_4.csv
122
130
  - example/json_key.json
123
131
  - example/nested_example.jsonl
124
132
  - example/schema.json