csv2avro 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6eae97d5b2bf7476331128770ffee4d3b6d69d7a
4
- data.tar.gz: 554e64338b5950de37ccaad44927176f6922f94c
3
+ metadata.gz: 4102328d73046f50036d1a35848142d0d23e50dc
4
+ data.tar.gz: d7f59e943fd02106579e9f25b6bc5480e5243b03
5
5
  SHA512:
6
- metadata.gz: 3a70f269a7337d6dad0bd24528e9092b897217254610a8257db0fecc72d55dada505c68040b3a1309b3e8266473257f118a88231a54b5627c74ffb63c998d49c
7
- data.tar.gz: 01cc32197d34410522aed53d4682aa9c91b20a63ad9e09a80831cc7e6af6d5bfd1a972488bb7b52f263451222a0363f10e5ab8a07e09ab38a5154b46496ff93e
6
+ metadata.gz: e1e6e26a916c6beb65c25395e062fa8f45037171fcfc624798fb68417f257ac761fdb6ffd560dda251bee89953be59399f3e66ee0ce612e405cf352862cd4d30
7
+ data.tar.gz: e94c31792b3113edb64dfc1e72cb24868af3a446469df2ada734d6ef0210992c5739ad26ba485832fcfc37204193d1bbde59155df98a18c34b644cf65aac50f6
data/.gitignore CHANGED
@@ -4,7 +4,9 @@
4
4
  /_yardoc/
5
5
  /coverage/
6
6
  /doc/
7
+ /feeds/
7
8
  /pkg/
9
+ /schema/
8
10
  /spec/reports/
9
11
  /tmp/
10
12
  *.bundle
data/CHANGELOG.md CHANGED
@@ -3,6 +3,11 @@
3
3
  All notable changes to this project are documented in this file.
4
4
  This project adheres to [Semantic Versioning](http://semver.org/).
5
5
 
6
+ ## 1.0.1 (2015-06-12; [compare](https://github.com/sspinc/csv2avro/compare/1.0.0...1.0.1))
7
+
8
+ ### Fixed
9
+ * CSV parsing issues
10
+
6
11
  ## 1.0.0 (2015-06-05; [compare](https://github.com/sspinc/csv2avro/compare/0.4.0...1.0.0))
7
12
 
8
13
  ### Added
data/README.md CHANGED
@@ -8,12 +8,6 @@ Convert CSV files to Avro like a boss.
8
8
 
9
9
  or if you prefer to live on the edge, just clone this repository and build it from scratch.
10
10
 
11
- You can run the converter within a **Docker** container, you just need to pull the `sspinc/csv2avro` image.
12
-
13
- ```
14
- $ docker pull sspinc/csv2avro
15
- ```
16
-
17
11
  ## Usage
18
12
 
19
13
  ### Basic
@@ -24,12 +18,6 @@ This will process the data.csv file and creates a *data.avro* file and a *data.b
24
18
 
25
19
  You can override the bad-rows file location with the `--bad-rows [BAD_ROWS]` option.
26
20
 
27
- ### CSV2Avro in Docker
28
-
29
- ```
30
- $ docker run sspinc/csv2avro --help
31
- ```
32
-
33
21
  ### Streaming
34
22
  ```
35
23
  $ cat ./spec/support/data.csv | csv2avro --schema ./spec/support/schema.avsc --bad-rows ./spec/support/data.bad.csv > ./spec/support/data.avro
@@ -59,7 +47,7 @@ This will uncompress the file and converts it to avro, leaving the original file
59
47
  For a full list of available options, run `csv2avro --help`
60
48
  ```
61
49
  $ csv2avro --help
62
- Version 1.0.0 of CSV2Avro
50
+ Version 1.0.1 of CSV2Avro
63
51
  Usage: csv2avro [options] [file]
64
52
  -s, --schema SCHEMA A file containing the Avro schema. This value is required.
65
53
  -b, --bad-rows [BAD_ROWS] The output location of the bad rows file.
data/Rakefile CHANGED
@@ -16,26 +16,3 @@ RSpec::Core::RakeTask.new(:spec) do |task|
16
16
  end
17
17
 
18
18
  task :default => :spec
19
-
20
- namespace :docker do
21
- desc "Build docker image"
22
- task :build do
23
- sh "docker build -t sspinc/csv2avro:#{CSV2Avro::VERSION} ."
24
- minor_version = CSV2Avro::VERSION.sub(/\.[0-9]+$/, '')
25
- sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{minor_version}"
26
- major_version = minor_version.sub(/\.[0-9]+$/, '')
27
- sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{major_version}"
28
-
29
- sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:latest"
30
- end
31
-
32
- desc "Run specs inside docker image"
33
- task :spec => :build do
34
- sh "docker run -t --entrypoint=rake sspinc/csv2avro:#{CSV2Avro::VERSION} spec"
35
- end
36
-
37
- desc "Push docker image"
38
- task :push => :spec do
39
- sh "docker push sspinc/csv2avro"
40
- end
41
- end
data/csv2avro.gemspec CHANGED
@@ -24,5 +24,5 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "pry", "~> 0.10"
25
25
  spec.add_development_dependency "bump", "~> 0.5"
26
26
 
27
- spec.add_dependency "avro", "~> 1.7"
27
+ spec.add_runtime_dependency "avro", "~> 1.7"
28
28
  end
data/lib/avro_schema.rb CHANGED
@@ -47,7 +47,7 @@ module Avro
47
47
  if datum.nil? && expected_type != :null
48
48
  @errors << "Missing value at #{name}"
49
49
  else
50
- @errors << "'#{datum}' at #{name} does'n match the type '#{expected_schema.to_s}'"
50
+ @errors << "'#{datum}' at #{name} doesn't match the type '#{expected_schema.to_s}'"
51
51
  end
52
52
  end
53
53
 
@@ -17,11 +17,11 @@ class CSV2Avro
17
17
  end
18
18
 
19
19
  def writer_schema
20
- avro_writer.datum_writer.writers_schema
20
+ @avro_writer.datum_writer.writers_schema
21
21
  end
22
22
 
23
23
  def write(hash)
24
- avro_writer << hash
24
+ @avro_writer << hash
25
25
  end
26
26
  end
27
27
  end
@@ -4,122 +4,120 @@ require 'csv'
4
4
 
5
5
  class CSV2Avro
6
6
  class Converter
7
- attr_reader :writer, :bad_rows_writer, :error_writer, :schema, :reader, :csv_options, :converter_options, :header_row, :column_separator
8
-
9
7
  def initialize(reader, writer, bad_rows_writer, error_writer, options, schema: schema)
8
+ @reader = reader
10
9
  @writer = writer
11
10
  @bad_rows_writer = bad_rows_writer
12
11
  @error_writer = error_writer
12
+ @options = options
13
13
  @schema = schema
14
14
 
15
- @column_separator = options[:delimiter] || ','
16
-
17
- @reader = reader
18
- @header_row = reader.readline.strip
19
- header = header_row.split(column_separator)
20
-
21
- init_header_converter
22
- @csv_options = {
23
- headers: header,
24
- skip_blanks: true,
25
- col_sep: column_separator,
26
- header_converters: :aliases
27
- }
28
-
29
- @converter_options = options
15
+ # read header row explicitly
16
+ @header = @reader.readline.strip.split(col_sep)
30
17
  end
31
18
 
32
19
  def convert
33
- defaults = schema.defaults if converter_options[:write_defaults]
20
+ csv.each do |row|
21
+ hash = row.to_hash
34
22
 
35
- fields_to_convert = schema.types.reject{ |key, value| value == :string }
23
+ add_defaults_to_hash!(hash) if @options[:write_defaults]
24
+ convert_fields!(hash)
36
25
 
37
- reader.each do |line|
38
- CSV.parse(line, csv_options) do |row|
39
- row = row.to_hash
26
+ begin
27
+ @writer.write(hash)
28
+ rescue Avro::IO::AvroTypeError
29
+ bad_rows_csv << row
40
30
 
41
- if converter_options[:write_defaults]
42
- add_defaults_to_row!(row, defaults)
31
+ until Avro::Schema.errors.empty? do
32
+ @error_writer.puts("line #{line_number}: #{Avro::Schema.errors.shift}")
43
33
  end
34
+ end
35
+ end
44
36
 
45
- convert_fields!(row, fields_to_convert)
37
+ @writer.flush
38
+ rescue CSV::MalformedCSVError
39
+ @error_writer.puts("line #{line_number}: Unable to parse")
40
+ end
46
41
 
47
- begin
48
- writer.write(row)
49
- writer.flush
50
- rescue
51
- if bad_rows_writer.size == 0
52
- bad_rows_writer << header_row + "\n"
53
- end
42
+ private
54
43
 
55
- bad_rows_writer << line
56
- bad_rows_writer.flush
44
+ def array_delimiter
45
+ @options[:array_delimiter] || ','
46
+ end
57
47
 
58
- until Avro::Schema.errors.empty? do
59
- error_writer << "line #{reader.lineno}: #{Avro::Schema.errors.shift}\n"
60
- end
61
- end
62
- end
63
- end
48
+ def col_sep
49
+ @options[:delimiter] || ','
64
50
  end
65
51
 
66
- private
52
+ def csv_options
53
+ {
54
+ col_sep: col_sep,
55
+ headers: @header,
56
+ header_converters: :aliases,
57
+ skip_blanks: true,
58
+ write_headers: true
59
+ }
60
+ end
67
61
 
68
- def convert_fields!(row, fields_to_convert)
69
- fields_to_convert.each do |key, value|
70
- row[key] = begin
71
- case value
72
- when :int
73
- Integer(row[key])
74
- when :float, :double
75
- Float(row[key])
76
- when :boolean
77
- parse_boolean(row[key])
78
- when :array
79
- parse_array(row[key])
80
- when :enum
81
- row[key].downcase.tr(" ", "_")
82
- end
83
- rescue
84
- row[key]
85
- end
62
+ def csv
63
+ # Initialize header converter
64
+ CSV::HeaderConverters[:aliases] = lambda do |header|
65
+ @schema.aliases[header] || header
86
66
  end
87
67
 
88
- row
68
+ @csv ||= CSV.new(@reader, csv_options)
89
69
  end
90
70
 
91
- def parse_boolean(value)
92
- return true if value == true || value =~ (/^(true|t|yes|y|1)$/i)
93
- return false if value == false || value =~ (/^(false|f|no|n|0)$/i)
94
- nil
71
+ def bad_rows_csv
72
+ options = csv_options.tap { |hash| hash.delete(:header_converters) }
73
+ @bad_rows_csv ||= CSV.new(@bad_rows_writer, options)
95
74
  end
96
75
 
97
- def parse_array(value)
98
- delimiter = converter_options[:array_delimiter] || ','
99
-
100
- value.split(delimiter) if value
76
+ def line_number
77
+ @reader.lineno + 1
101
78
  end
102
79
 
103
- def add_defaults_to_row!(row, defaults)
104
- # Add default values to nil cells
105
- row.each do |key, value|
106
- row[key] = defaults[key] if value.nil?
80
+ def add_defaults_to_hash!(hash)
81
+ # Add default values to empty/missing fields
82
+ @schema.defaults.each do |key, value|
83
+ hash[key] = @schema.defaults[key] if hash[key].nil? or !hash.has_key?(key)
107
84
  end
85
+ end
108
86
 
109
- # Add default values to missing columns
110
- defaults.each do |key, value|
111
- row[key] = defaults[key] unless row.has_key?(key)
87
+ def convert_fields!(hash)
88
+ @schema.types.each do |key, value|
89
+ hash[key] = begin
90
+ case value
91
+ when :int
92
+ Integer(hash[key])
93
+ when :float, :double
94
+ Float(hash[key])
95
+ when :boolean
96
+ parse_boolean(hash[key])
97
+ when :array
98
+ parse_array(hash[key])
99
+ when :enum
100
+ hash[key].downcase.tr(" ", "_")
101
+ else
102
+ hash[key]
103
+ end
104
+ rescue
105
+ hash[key]
106
+ end
112
107
  end
113
-
114
- row
115
108
  end
116
109
 
117
- def init_header_converter
118
- aliases = schema.aliases
119
-
120
- CSV::HeaderConverters[:aliases] = lambda do |header|
121
- aliases[header] || header
110
+ def parse_boolean(value)
111
+ case
112
+ when value == true || value =~ (/^(true|t|yes|y|1)$/i) then true
113
+ when value == false || value =~ (/^(false|f|no|n|0)$/i) then false
114
+ else
115
+ nil
122
116
  end
123
117
  end
118
+
119
+ def parse_array(value)
120
+ value.split(array_delimiter) if value
121
+ end
124
122
  end
125
123
  end
@@ -1,3 +1,3 @@
1
1
  class CSV2Avro
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
data/lib/csv2avro.rb CHANGED
@@ -17,12 +17,7 @@ class CSV2Avro
17
17
  Converter.new(reader, writer, bad_rows_writer, error_writer, options, schema: schema).convert
18
18
  ensure
19
19
  writer.close if writer
20
-
21
- if bad_rows_writer.size == 0
22
- File.delete(bad_rows_uri)
23
- elsif bad_rows_writer
24
- bad_rows_writer.close
25
- end
20
+ bad_rows_writer.close
26
21
  end
27
22
 
28
23
  private
@@ -34,7 +29,7 @@ class CSV2Avro
34
29
  end
35
30
 
36
31
  def reader
37
- ARGF.lineno = 0
32
+ ARGF.lineno = -1
38
33
  ARGF
39
34
  end
40
35
 
@@ -1,9 +1,15 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe CSV2Avro::Converter do
4
- describe '#read' do
4
+ describe '#convert' do
5
+ let(:schema) { CSV2Avro::Schema.new(schema_reader) }
6
+ let(:writer) { StringIO.new }
7
+ let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
8
+ let(:bad_rows_writer) { StringIO.new }
9
+ let(:error_writer) { StringIO.new }
10
+
5
11
  context 'schema with string and integer columns' do
6
- let(:schema_io) do
12
+ let(:schema_reader) do
7
13
  StringIO.new(
8
14
  {
9
15
  name: 'categories',
@@ -20,7 +26,7 @@ RSpec.describe CSV2Avro::Converter do
20
26
  context 'separated with commas (csv)' do
21
27
  let(:reader) do
22
28
  StringIO.new(
23
- csv_string = CSV.generate do |csv|
29
+ CSV.generate do |csv|
24
30
  csv << %w[id name description]
25
31
  csv << %w[1 dresses Dresses]
26
32
  csv << %w[2 female-tops]
@@ -28,28 +34,20 @@ RSpec.describe CSV2Avro::Converter do
28
34
  )
29
35
  end
30
36
 
31
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
32
-
33
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
34
-
35
- let(:bad_rows_writer) { StringIO.new }
36
-
37
- let(:error_writer) { StringIO.new }
38
-
39
37
  before do
40
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, {}, schema: schema).convert
38
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
41
39
  end
42
40
 
43
41
  it 'should not have any bad rows' do
44
- expect(bad_rows_writer.read).to eq("")
42
+ expect(bad_rows_writer.read).to be_empty
45
43
  end
46
44
 
47
45
  it 'should not have any errors' do
48
- expect(error_writer.read).to eq("")
46
+ expect(error_writer.read).to be_empty
49
47
  end
50
48
 
51
49
  it 'should store the data with the given schema' do
52
- expect(AvroReader.new(writer).read).to eq(
50
+ expect(AvroReader.new(avro_writer).read).to eq(
53
51
  [
54
52
  { 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
55
53
  { 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
@@ -61,7 +59,7 @@ RSpec.describe CSV2Avro::Converter do
61
59
  context 'separated with tabs (tsv)' do
62
60
  let(:reader) do
63
61
  StringIO.new(
64
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
62
+ CSV.generate({col_sep: "\t"}) do |csv|
65
63
  csv << %w[id name description]
66
64
  csv << %w[1 dresses Dresses]
67
65
  csv << %w[2 female-tops]
@@ -69,24 +67,16 @@ RSpec.describe CSV2Avro::Converter do
69
67
  )
70
68
  end
71
69
 
72
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
73
-
74
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
75
-
76
- let(:bad_rows_writer) { StringIO.new }
77
-
78
- let(:error_writer) { StringIO.new }
79
-
80
70
  before do
81
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
71
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
82
72
  end
83
73
 
84
74
  it 'should not have any bad rows' do
85
- expect(bad_rows_writer.read).to eq("")
75
+ expect(bad_rows_writer.read).to be_empty
86
76
  end
87
77
 
88
78
  it 'should not have any errors' do
89
- expect(error_writer.read).to eq("")
79
+ expect(error_writer.read).to be_empty
90
80
  end
91
81
 
92
82
  it 'should store the data with the given schema' do
@@ -101,7 +91,7 @@ RSpec.describe CSV2Avro::Converter do
101
91
  end
102
92
 
103
93
  context 'schema with boolean and array columns' do
104
- let(:schema_io) do
94
+ let(:schema_reader) do
105
95
  StringIO.new(
106
96
  {
107
97
  name: 'categories',
@@ -118,7 +108,7 @@ RSpec.describe CSV2Avro::Converter do
118
108
  context 'separated with commas (default)' do
119
109
  let(:reader) do
120
110
  StringIO.new(
121
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
111
+ CSV.generate do |csv|
122
112
  csv << %w[id enabled image_links]
123
113
  csv << %w[1 true http://www.images.com/dresses.jpeg]
124
114
  csv << %w[2 false http://www.images.com/bras1.jpeg,http://www.images.com/bras2.jpeg]
@@ -126,24 +116,16 @@ RSpec.describe CSV2Avro::Converter do
126
116
  )
127
117
  end
128
118
 
129
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
130
-
131
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
132
-
133
- let(:bad_rows_writer) { StringIO.new }
134
-
135
- let(:error_writer) { StringIO.new }
136
-
137
119
  before do
138
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
120
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
139
121
  end
140
122
 
141
123
  it 'should not have any bad rows' do
142
- expect(bad_rows_writer.read).to eq("")
124
+ expect(bad_rows_writer.read).to be_empty
143
125
  end
144
126
 
145
127
  it 'should not have any errors' do
146
- expect(error_writer.read).to eq("")
128
+ expect(error_writer.read).to be_empty
147
129
  end
148
130
 
149
131
  it 'should store the data with the given schema' do
@@ -159,7 +141,7 @@ RSpec.describe CSV2Avro::Converter do
159
141
  context 'separated with semicolons' do
160
142
  let(:reader) do
161
143
  StringIO.new(
162
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
144
+ CSV.generate({col_sep: "\t"}) do |csv|
163
145
  csv << %w[id enabled image_links]
164
146
  csv << %w[1 true http://www.images.com/dresses.jpeg]
165
147
  csv << %w[2 false http://www.images.com/bras1.jpeg;http://www.images.com/bras2.jpeg]
@@ -167,24 +149,16 @@ RSpec.describe CSV2Avro::Converter do
167
149
  )
168
150
  end
169
151
 
170
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
171
-
172
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
173
-
174
- let(:bad_rows_writer) { StringIO.new }
175
-
176
- let(:error_writer) { StringIO.new }
177
-
178
152
  before do
179
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
153
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
180
154
  end
181
155
 
182
156
  it 'should not have any bad rows' do
183
- expect(bad_rows_writer.read).to eq("")
157
+ expect(bad_rows_writer.read).to be_empty
184
158
  end
185
159
 
186
160
  it 'should not have any errors' do
187
- expect(error_writer.read).to eq("")
161
+ expect(error_writer.read).to be_empty
188
162
  end
189
163
 
190
164
  it 'should store the data with the given schema' do
@@ -198,8 +172,8 @@ RSpec.describe CSV2Avro::Converter do
198
172
  end
199
173
  end
200
174
 
201
- context 'shema with default vaules' do
202
- let(:schema_io) do
175
+ context 'schema with default vaules' do
176
+ let(:schema_reader) do
203
177
  StringIO.new(
204
178
  {
205
179
  name: 'product',
@@ -216,7 +190,7 @@ RSpec.describe CSV2Avro::Converter do
216
190
 
217
191
  let(:reader) do
218
192
  StringIO.new(
219
- csv_string = CSV.generate do |csv|
193
+ CSV.generate do |csv|
220
194
  csv << %w[id category enabled]
221
195
  csv << %w[1 dresses true]
222
196
  csv << %w[2 ]
@@ -224,24 +198,16 @@ RSpec.describe CSV2Avro::Converter do
224
198
  )
225
199
  end
226
200
 
227
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
228
-
229
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
230
-
231
- let(:bad_rows_writer) { StringIO.new }
232
-
233
- let(:error_writer) { StringIO.new }
234
-
235
201
  before do
236
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
202
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
237
203
  end
238
204
 
239
205
  it 'should not have any bad rows' do
240
- expect(bad_rows_writer.read).to eq("")
206
+ expect(bad_rows_writer.read).to be_empty
241
207
  end
242
208
 
243
209
  it 'should not have any errors' do
244
- expect(error_writer.read).to eq("")
210
+ expect(error_writer.read).to be_empty
245
211
  end
246
212
 
247
213
  it 'should store the defaults data' do
@@ -257,7 +223,7 @@ RSpec.describe CSV2Avro::Converter do
257
223
  context 'schema with aliased fields' do
258
224
  let(:reader) do
259
225
  StringIO.new(
260
- csv_string = CSV.generate do |csv|
226
+ CSV.generate do |csv|
261
227
  csv << %w[id color_id]
262
228
  csv << %w[1 1_red]
263
229
  csv << %w[2 2_blue]
@@ -265,7 +231,7 @@ RSpec.describe CSV2Avro::Converter do
265
231
  )
266
232
  end
267
233
 
268
- let(:schema_io) do
234
+ let(:schema_reader) do
269
235
  StringIO.new(
270
236
  {
271
237
  name: 'product',
@@ -278,24 +244,16 @@ RSpec.describe CSV2Avro::Converter do
278
244
  )
279
245
  end
280
246
 
281
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
282
-
283
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
284
-
285
- let(:bad_rows_writer) { StringIO.new }
286
-
287
- let(:error_writer) { StringIO.new }
288
-
289
247
  before do
290
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, {}, schema: schema).convert
248
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
291
249
  end
292
250
 
293
251
  it 'should not have any bad rows' do
294
- expect(bad_rows_writer.read).to eq("")
252
+ expect(bad_rows_writer.read).to be_empty
295
253
  end
296
254
 
297
255
  it 'should not have any errors' do
298
- expect(error_writer.read).to eq("")
256
+ expect(error_writer.read).to be_empty
299
257
  end
300
258
 
301
259
  it 'should store the data with the given schema' do
@@ -309,7 +267,7 @@ RSpec.describe CSV2Avro::Converter do
309
267
  end
310
268
 
311
269
  context 'schema with enum column' do
312
- let(:schema_io) do
270
+ let(:schema_reader) do
313
271
  StringIO.new(
314
272
  {
315
273
  name: 'product',
@@ -330,7 +288,7 @@ RSpec.describe CSV2Avro::Converter do
330
288
 
331
289
  let(:reader) do
332
290
  StringIO.new(
333
- csv_string = CSV.generate do |csv|
291
+ CSV.generate do |csv|
334
292
  csv << %w[id size_type]
335
293
  csv << %w[1 regular]
336
294
  csv << %W[2 big\sand\stall]
@@ -339,24 +297,16 @@ RSpec.describe CSV2Avro::Converter do
339
297
  )
340
298
  end
341
299
 
342
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
343
-
344
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
345
-
346
- let(:bad_rows_writer) { StringIO.new }
347
-
348
- let(:error_writer) { StringIO.new }
349
-
350
300
  before do
351
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
301
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
352
302
  end
353
303
 
354
304
  it 'should not have any bad rows' do
355
- expect(bad_rows_writer.read).to eq("")
305
+ expect(bad_rows_writer.read).to be_empty
356
306
  end
357
307
 
358
308
  it 'should not have any errors' do
359
- expect(error_writer.read).to eq("")
309
+ expect(error_writer.read).to be_empty
360
310
  end
361
311
 
362
312
  it 'should store the data with the given schema' do
@@ -371,7 +321,7 @@ RSpec.describe CSV2Avro::Converter do
371
321
  end
372
322
 
373
323
  context 'data with bad rows' do
374
- let(:schema_io) do
324
+ let(:schema_reader) do
375
325
  StringIO.new(
376
326
  {
377
327
  name: 'categories',
@@ -387,7 +337,7 @@ RSpec.describe CSV2Avro::Converter do
387
337
 
388
338
  let(:reader) do
389
339
  StringIO.new(
390
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
340
+ CSV.generate({col_sep: "\t"}) do |csv|
391
341
  csv << %w[id title description]
392
342
  csv << ['1', nil, 'dresses']
393
343
  csv << %w[2 female-tops]
@@ -397,16 +347,8 @@ RSpec.describe CSV2Avro::Converter do
397
347
  )
398
348
  end
399
349
 
400
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
401
-
402
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
403
-
404
- let(:bad_rows_writer) { StringIO.new }
405
-
406
- let(:error_writer) { StringIO.new }
407
-
408
350
  before do
409
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
351
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
410
352
  end
411
353
 
412
354
  it 'should have the bad data in the original form' do
@@ -2,20 +2,15 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe CSV2Avro do
4
4
  describe '#convert' do
5
- let(:options) do
6
- {
7
- schema: './spec/support/schema.avsc'
8
- }
9
- end
5
+ let(:options) { { schema: './spec/support/schema.avsc' } }
10
6
 
11
- subject(:converter) do
7
+ before do
12
8
  ARGV.replace ['./spec/support/data.csv']
13
-
14
- CSV2Avro.new(options)
15
9
  end
10
+ subject(:converter) { CSV2Avro.new(options) }
16
11
 
17
- it 'should write the problems to STDERR' do
18
- expect { converter.convert }.to output("line 4: Missing value at name\n").to_stderr
12
+ it 'should write errors to STDERR' do
13
+ expect { converter.convert }.to output("line 4: Missing value at name\nline 5: Unable to parse\n").to_stderr
19
14
  end
20
15
 
21
16
  it 'should have a bad row' do
@@ -1,4 +1,5 @@
1
1
  id,name,description
2
- 1,dresses,Dresses
2
+ 1,dresses,"Dresses"
3
3
  2,female-tops,
4
- 3,,Bras
4
+ 3,,"Bras"
5
+ 4,male-tops,"Male Tops""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv2avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ableda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-11 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -102,11 +102,9 @@ executables:
102
102
  extensions: []
103
103
  extra_rdoc_files: []
104
104
  files:
105
- - ".dockerignore"
106
105
  - ".gitignore"
107
106
  - ".travis.yml"
108
107
  - CHANGELOG.md
109
- - Dockerfile
110
108
  - Gemfile
111
109
  - LICENSE.txt
112
110
  - README.md
data/.dockerignore DELETED
@@ -1 +0,0 @@
1
- .git
data/Dockerfile DELETED
@@ -1,23 +0,0 @@
1
- FROM ruby:2.1
2
- MAINTAINER Secret Sauce Partners, Inc. <dev@sspinc.io>
3
-
4
- RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
5
- python2.7 get-pip.py && \
6
- pip install awscli
7
-
8
- # throw errors if Gemfile has been modified since Gemfile.lock
9
- RUN bundle config --global frozen 1
10
-
11
- RUN mkdir -p /srv/csv2avro
12
- WORKDIR /srv/csv2avro
13
-
14
- RUN mkdir -p /srv/csv2avro/lib/csv2avro
15
-
16
- COPY lib/csv2avro/version.rb /srv/csv2avro/lib/csv2avro/version.rb
17
- COPY csv2avro.gemspec Gemfile Gemfile.lock /srv/csv2avro/
18
-
19
- RUN bundle install
20
-
21
- COPY . /srv/csv2avro
22
-
23
- ENTRYPOINT ["./bin/csv2avro"]