embulk-output-bigquery 0.3.0.pre2 → 0.3.0.pre3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d94d238ce16c412168d14a2d598c4858c2517a1
4
- data.tar.gz: 98c6a413ce767acc0db2a1b4b6fa8f1f43f1d7db
3
+ metadata.gz: f1dbd4398ccaccaf5b4d2d812dcbf64d2610c41c
4
+ data.tar.gz: 3b1ad06791c8b6b65139dfaf7d82e7e1f253c35d
5
5
  SHA512:
6
- metadata.gz: d5aee1620e2171c7ca84101c196bb85015bd53d1a5816949353e3e921559dcc96211a81c1c9d6a35a563de2c4656ed98c8b495577ba57d1a8e74e046bb9ab594
7
- data.tar.gz: 50f32129feea6bb33c7574e0d7aa73e387e37ced9ebe5d96a550225de78bddb365ab603b490a0a8361b008f16332a1092e84c278cf25e21a168700b8a751023a
6
+ metadata.gz: 5307cce2d9983b7c2710180f28e0e19515d6895dcd3b5d6e0c48503c57b5e555332831ed328a21daf9743e7688d7d3a51e1f81e4436370311e8defdd1dbc6b22
7
+ data.tar.gz: eeae7ee0e52c2e218478d5d58d3a2eeb76fedb02e5f6d1ba85a393069dbf1c455028658593b3dcedeff7f6dced4de9c31185e33d2fde824bfc9d4b3494acbc09
data/README.md CHANGED
@@ -65,7 +65,7 @@ Options for intermediate local files
65
65
  | name | type | required? | default | description |
66
66
  |:-------------------------------------|:------------|:-----------|:-------------------------|:-----------------------|
67
67
  | path_prefix | string | optional | | Path prefix of local files such as "/tmp/prefix_". Default randomly generates with [tempfile](http://ruby-doc.org/stdlib-2.2.3/libdoc/tempfile/rdoc/Tempfile.html) |
68
- | sequence_format | string | optional | .%d.%03d | Sequence format for pid, task index |
68
+ | sequence_format | string | optional | .%d.%d | Sequence format for pid, thread id |
69
69
  | file_ext | string | optional | | The file extension of local files such as ".csv.gz" ".json.gz". Default automatically generates from `source_format` and `compression`|
70
70
  | skip_file_generation | boolean | optional | | Load already generated local files into BigQuery if available. Specify correct path_prefix and file_ext. |
71
71
  | delete_from_local_when_job_end | boolean | optional | false | If set to true, delete glocal file when job is end |
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-bigquery"
3
- spec.version = "0.3.0.pre2"
3
+ spec.version = "0.3.0.pre3"
4
4
  spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
5
5
  spec.summary = "Google BigQuery output plugin for Embulk"
6
6
  spec.description = "Embulk plugin that insert records to Google BigQuery."
@@ -0,0 +1,34 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example4_
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: example/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ compression: NONE
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json
31
+ exec:
32
+ type: local
33
+ min_output_tasks: 2
34
+ max_threads: 2
@@ -0,0 +1,34 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example2_
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: date, type: string}
13
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
+ - {name: "null", type: string}
15
+ - {name: long, type: long}
16
+ - {name: string, type: string}
17
+ - {name: double, type: double}
18
+ - {name: boolean, type: boolean}
19
+ out:
20
+ type: bigquery
21
+ mode: replace
22
+ auth_method: json_key
23
+ json_keyfile: example/your-project-000.json
24
+ dataset: your_dataset_name
25
+ table: your_table_name
26
+ source_format: NEWLINE_DELIMITED_JSON
27
+ compression: GZIP
28
+ auto_create_dataset: true
29
+ auto_create_table: true
30
+ schema_file: example/schema.json
31
+ exec:
32
+ type: local
33
+ min_output_tasks: 8
34
+ max_threads: 4
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -0,0 +1,17 @@
1
+ date,timestamp,null,long,string,double,boolean
2
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,true
3
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,true
4
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,true
5
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,true
6
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,true
7
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,true
8
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,true
9
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,true
10
+ 2015-07-13,2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,false
11
+ 2015-07-13,2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,false
12
+ 2015-07-13,2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,false
13
+ 2015-07-13,2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,false
14
+ 2015-07-13,2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,false
15
+ 2015-07-13,2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,false
16
+ 2015-07-13,2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,false
17
+ 2015-07-13,2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,false
@@ -63,7 +63,7 @@ module Embulk
63
63
  'application_name' => config.param('application_name', :string, :default => 'Embulk BigQuery plugin'),
64
64
 
65
65
  'path_prefix' => config.param('path_prefix', :string, :default => nil),
66
- 'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%03d'),
66
+ 'sequence_format' => config.param('sequence_format', :string, :default => '.%d.%d'),
67
67
  'file_ext' => config.param('file_ext', :string, :default => nil),
68
68
  'skip_file_generation' => config.param('skip_file_generation', :bool, :default => false),
69
69
  'compression' => config.param('compression', :string, :default => 'NONE'),
@@ -273,11 +273,12 @@ module Embulk
273
273
  path_pattern = "#{task['path_prefix']}*#{task['file_ext']}"
274
274
  Embulk.logger.info { "embulk-output-bigquery: Skip file generation. Get paths from `#{path_pattern}`" }
275
275
  paths = Dir.glob(path_pattern)
276
- task_reports = paths.map {|path| { 'path' => path, 'num_input_rows' => 0 } }
276
+ task_reports = paths.map {|path| { 'num_input_rows' => 0 } }
277
277
  else
278
278
  task_reports = yield(task) # generates local files
279
279
  Embulk.logger.info { "embulk-output-bigquery: task_reports: #{task_reports.to_json}" }
280
- paths = task_reports.map {|report| report['path'] }
280
+ paths = FileWriter.paths
281
+ FileWriter.ios.each {|io| io.close rescue nil }
281
282
  end
282
283
 
283
284
  if task['skip_load'] # only for debug
@@ -294,11 +295,9 @@ module Embulk
294
295
 
295
296
  if task['temp_table']
296
297
  if task['mode'] == 'append'
297
- bigquery.copy(task['temp_table'], task['table'],
298
- write_disposition: 'WRITE_APPEND')
298
+ bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_APPEND')
299
299
  else # replace or replace_backup
300
- bigquery.copy(task['temp_table'], task['table'],
301
- write_disposition: 'WRITE_TRUNCATE')
300
+ bigquery.copy(task['temp_table'], task['table'], write_disposition: 'WRITE_TRUNCATE')
302
301
  end
303
302
  end
304
303
  end
@@ -351,11 +350,11 @@ module Embulk
351
350
  def add(page)
352
351
  if task['with_rehearsal'] and @index == 0 and !@rehearsaled
353
352
  page = page.to_a # to avoid https://github.com/embulk/embulk/issues/403
354
- if @num_rows > task['rehearsal_counts']
353
+ if @num_rows >= task['rehearsal_counts']
355
354
  Embulk.logger.info { "embulk-output-bigquery: Rehearsal started" }
356
355
  begin
357
356
  @bigquery.create_table(task['rehearsal_table'])
358
- @bigquery.load(@file_writer.path, task['rehearsal_table'])
357
+ @bigquery.load(FileWriter.paths.first, task['rehearsal_table'])
359
358
  ensure
360
359
  @bigquery.delete_table(task['rehearsal_table'])
361
360
  end
@@ -245,7 +245,7 @@ module Embulk
245
245
  status = _response.status.state
246
246
  if status == "DONE"
247
247
  Embulk.logger.info {
248
- "embulk-output-bigquery: #{kind} job completed successfully... " \
248
+ "embulk-output-bigquery: #{kind} job completed... " \
249
249
  "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
250
250
  }
251
251
  break
@@ -7,8 +7,6 @@ module Embulk
7
7
  module Output
8
8
  class Bigquery < OutputPlugin
9
9
  class FileWriter
10
- attr_reader :path
11
-
12
10
  def initialize(task, schema, index, converters = nil)
13
11
  @task = task
14
12
  @schema = schema
@@ -30,21 +28,49 @@ module Embulk
30
28
  @formatter_proc = self.method(:to_jsonl)
31
29
  end
32
30
  end
31
+ end
32
+
33
+ def self.reset_ios
34
+ @ios = Set.new
35
+ end
36
+
37
+ def self.ios
38
+ @ios ||= Set.new
39
+ end
40
+
41
+ def self.paths
42
+ ios.map {|io| io.path }
43
+ end
44
+
45
+ THREAD_LOCAL_IO_KEY = :embulk_output_bigquery_file_writer_io
46
+
47
+ # Create one io object for one output thread, that is, share among tasks
48
+ # Close theses shared io objects in transaction
49
+ #
50
+ # Thread IO must be created at #add because threads in #initialize or #commit
51
+ # are different (called from non-output threads). Note also that #add of the
52
+ # same instance would be called in different output threads
53
+ def thread_io
54
+ return Thread.current[THREAD_LOCAL_IO_KEY] if Thread.current[THREAD_LOCAL_IO_KEY]
33
55
 
34
- @path = sprintf("#{@task['path_prefix']}#{@task['sequence_format']}#{@task['file_ext']}", Process.pid, index)
35
- Embulk.logger.info { "embulk-output-bigquery: will create #{@path}" }
36
- if File.exist?(@path)
37
- Embulk.logger.warn { "embulk-output-bigquery: unlink already existing #{@path}" }
38
- File.unlink(@path) rescue nil
56
+ path = sprintf(
57
+ "#{@task['path_prefix']}#{@task['sequence_format']}#{@task['file_ext']}",
58
+ Process.pid, Thread.current.object_id
59
+ )
60
+ if File.exist?(path)
61
+ Embulk.logger.warn { "embulk-output-bigquery: unlink already existing #{path}" }
62
+ File.unlink(path) rescue nil
39
63
  end
40
- @file_io = File.open(@path, 'w')
64
+ Embulk.logger.info { "embulk-output-bigquery: create #{path}" }
65
+ file_io = File.open(path, 'w')
41
66
 
42
67
  case @task['compression'].downcase
43
68
  when 'gzip'
44
- @io = Zlib::GzipWriter.new(@file_io)
69
+ io = Zlib::GzipWriter.new(file_io)
45
70
  else
46
- @io = @file_io
71
+ io = file_io
47
72
  end
73
+ Thread.current[THREAD_LOCAL_IO_KEY] = io
48
74
  end
49
75
 
50
76
  def to_payload(record)
@@ -72,13 +98,15 @@ module Embulk
72
98
  end
73
99
 
74
100
  def add(page)
101
+ io = thread_io
102
+ self.class.ios.add(io)
75
103
  # I once tried to split IO writing into another IO thread using SizedQueue
76
104
  # However, it resulted in worse performance, so I removed the codes.
77
105
  page.each do |record|
78
106
  Embulk.logger.trace { "embulk-output-bigquery: record #{record}" }
79
107
  formatted_record = @formatter_proc.call(record)
80
108
  Embulk.logger.trace { "embulk-output-bigquery: formatted_record #{formatted_record.chomp}" }
81
- @io.write formatted_record
109
+ io.write formatted_record
82
110
  @num_input_rows += 1
83
111
  end
84
112
  now = Time.now
@@ -91,10 +119,8 @@ module Embulk
91
119
  end
92
120
 
93
121
  def commit
94
- @io.close rescue nil
95
122
  task_report = {
96
123
  'num_input_rows' => @num_input_rows,
97
- 'path' => @path,
98
124
  }
99
125
  end
100
126
  end
@@ -74,7 +74,7 @@ module Embulk
74
74
  assert_equal 5, task['retries']
75
75
  assert_equal "Embulk BigQuery plugin", task['application_name']
76
76
  # assert_equal "/tmp/embulk_output_bigquery_20160228-27184-pubcn0", task['path_prefix']
77
- assert_equal ".%d.%03d", task['sequence_format']
77
+ assert_equal ".%d.%d", task['sequence_format']
78
78
  assert_equal ".csv", task['file_ext']
79
79
  assert_equal false, task['skip_file_generation']
80
80
  assert_equal "NONE", task['compression']
data/test/test_example.rb CHANGED
@@ -20,6 +20,7 @@ if ENV['CONNECT']
20
20
  files = files.reject {|file| File.symlink?(file) }
21
21
  # files.shift
22
22
  files.each do |config_path|
23
+ next if File.basename(config_path) == 'config_expose_errors.yml'
23
24
  define_method(:"test_#{File.basename(config_path, ".yml")}") do
24
25
  success = Bundler.with_clean_env do
25
26
  cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
@@ -16,6 +16,11 @@ module Embulk
16
16
  end
17
17
  end
18
18
 
19
+ def setup
20
+ Thread.current[FileWriter::THREAD_LOCAL_IO_KEY] = nil
21
+ FileWriter.reset_ios
22
+ end
23
+
19
24
  def default_task
20
25
  {
21
26
  'compression' => 'GZIP',
@@ -42,19 +47,30 @@ module Embulk
42
47
  @converters ||= ValueConverterFactory.create_converters(default_task, schema)
43
48
  end
44
49
 
50
+ def record
51
+ [true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
52
+ end
53
+
54
+ def page
55
+ [record]
56
+ end
57
+
45
58
  sub_test_case "path" do
46
59
  def test_path
47
60
  task = default_task.merge('path_prefix' => 'tmp/foo', 'sequence_format' => '', 'file_ext' => '.1')
48
61
  file_writer = FileWriter.new(task, schema, 0, converters)
49
- assert_equal 'tmp/foo.1', file_writer.instance_variable_get(:@path)
62
+
63
+ begin
64
+ file_writer.add(page)
65
+ ensure
66
+ io.close rescue nil
67
+ end
68
+ path = FileWriter.paths.first
69
+ assert_equal 'tmp/foo.1', path
50
70
  end
51
71
  end
52
72
 
53
73
  sub_test_case "formatter" do
54
- def record
55
- [true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
56
- end
57
-
58
74
  def test_payload_column_index
59
75
  task = default_task.merge('payload_column_index' => 0)
60
76
  file_writer = FileWriter.new(task, schema, 0, converters)
@@ -86,42 +102,36 @@ module Embulk
86
102
  end
87
103
 
88
104
  sub_test_case "compression" do
89
- def record
90
- [true, 1, 1.1, 'foo', Time.parse("2016-02-26 00:00:00 +09:00"), {"foo"=>"foo"}]
91
- end
92
-
93
- def page
94
- [record]
95
- end
96
-
97
105
  def test_gzip
98
106
  task = default_task.merge('compression' => 'GZIP')
99
107
  file_writer = FileWriter.new(task, schema, 0, converters)
100
- io = file_writer.instance_variable_get(:@io)
101
- assert_equal Zlib::GzipWriter, io.class
102
108
 
103
109
  begin
104
110
  file_writer.add(page)
111
+ io = FileWriter.ios.first
112
+ assert_equal Zlib::GzipWriter, io.class
105
113
  ensure
106
- file_writer.commit
114
+ io.close rescue nil
107
115
  end
108
- assert_true File.exist?(file_writer.path)
109
- assert_nothing_raised { Zlib::GzipReader.open(file_writer.path) {|gz| } }
116
+ path = FileWriter.paths.first
117
+ assert_true File.exist?(path)
118
+ assert_nothing_raised { Zlib::GzipReader.open(path) {|gz| } }
110
119
  end
111
120
 
112
121
  def test_uncompressed
113
122
  task = default_task.merge('compression' => 'NONE')
114
123
  file_writer = FileWriter.new(task, schema, 0, converters)
115
- io = file_writer.instance_variable_get(:@io)
116
- assert_equal File, io.class
117
124
 
118
125
  begin
119
126
  file_writer.add(page)
127
+ io = FileWriter.ios.first
128
+ assert_equal File, io.class
120
129
  ensure
121
- file_writer.commit
130
+ io.close rescue nil
122
131
  end
123
- assert_true File.exist?(file_writer.path)
124
- assert_raise { Zlib::GzipReader.open(file_writer.path) {|gz| } }
132
+ path = FileWriter.paths.first
133
+ assert_true File.exist?(path)
134
+ assert_raise { Zlib::GzipReader.open(path) {|gz| } }
125
135
  end
126
136
  end
127
137
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0.pre2
4
+ version: 0.3.0.pre3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-03-18 00:00:00.000000000 Z
12
+ date: 2016-03-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-api-client
@@ -104,6 +104,8 @@ files:
104
104
  - example/config_guess_with_column_options.yml
105
105
  - example/config_gzip.yml
106
106
  - example/config_jsonl.yml
107
+ - example/config_max_threads.yml
108
+ - example/config_min_ouput_tasks.yml
107
109
  - example/config_mode_append.yml
108
110
  - example/config_mode_append_direct.yml
109
111
  - example/config_payload_column.yml
@@ -119,6 +121,12 @@ files:
119
121
  - example/example.csv
120
122
  - example/example.jsonl
121
123
  - example/example.yml
124
+ - example/example2_1.csv
125
+ - example/example2_2.csv
126
+ - example/example4_1.csv
127
+ - example/example4_2.csv
128
+ - example/example4_3.csv
129
+ - example/example4_4.csv
122
130
  - example/json_key.json
123
131
  - example/nested_example.jsonl
124
132
  - example/schema.json