csv2avro 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6eae97d5b2bf7476331128770ffee4d3b6d69d7a
4
- data.tar.gz: 554e64338b5950de37ccaad44927176f6922f94c
3
+ metadata.gz: 4102328d73046f50036d1a35848142d0d23e50dc
4
+ data.tar.gz: d7f59e943fd02106579e9f25b6bc5480e5243b03
5
5
  SHA512:
6
- metadata.gz: 3a70f269a7337d6dad0bd24528e9092b897217254610a8257db0fecc72d55dada505c68040b3a1309b3e8266473257f118a88231a54b5627c74ffb63c998d49c
7
- data.tar.gz: 01cc32197d34410522aed53d4682aa9c91b20a63ad9e09a80831cc7e6af6d5bfd1a972488bb7b52f263451222a0363f10e5ab8a07e09ab38a5154b46496ff93e
6
+ metadata.gz: e1e6e26a916c6beb65c25395e062fa8f45037171fcfc624798fb68417f257ac761fdb6ffd560dda251bee89953be59399f3e66ee0ce612e405cf352862cd4d30
7
+ data.tar.gz: e94c31792b3113edb64dfc1e72cb24868af3a446469df2ada734d6ef0210992c5739ad26ba485832fcfc37204193d1bbde59155df98a18c34b644cf65aac50f6
data/.gitignore CHANGED
@@ -4,7 +4,9 @@
4
4
  /_yardoc/
5
5
  /coverage/
6
6
  /doc/
7
+ /feeds/
7
8
  /pkg/
9
+ /schema/
8
10
  /spec/reports/
9
11
  /tmp/
10
12
  *.bundle
data/CHANGELOG.md CHANGED
@@ -3,6 +3,11 @@
3
3
  All notable changes to this project are documented in this file.
4
4
  This project adheres to [Semantic Versioning](http://semver.org/).
5
5
 
6
+ ## 1.0.1 (2015-06-12; [compare](https://github.com/sspinc/csv2avro/compare/1.0.0...1.0.1))
7
+
8
+ ### Fixed
9
+ * CSV parsing issues
10
+
6
11
  ## 1.0.0 (2015-06-05; [compare](https://github.com/sspinc/csv2avro/compare/0.4.0...1.0.0))
7
12
 
8
13
  ### Added
data/README.md CHANGED
@@ -8,12 +8,6 @@ Convert CSV files to Avro like a boss.
8
8
 
9
9
  or if you prefer to live on the edge, just clone this repository and build it from scratch.
10
10
 
11
- You can run the converter within a **Docker** container, you just need to pull the `sspinc/csv2avro` image.
12
-
13
- ```
14
- $ docker pull sspinc/csv2avro
15
- ```
16
-
17
11
  ## Usage
18
12
 
19
13
  ### Basic
@@ -24,12 +18,6 @@ This will process the data.csv file and creates a *data.avro* file and a *data.b
24
18
 
25
19
  You can override the bad-rows file location with the `--bad-rows [BAD_ROWS]` option.
26
20
 
27
- ### CSV2Avro in Docker
28
-
29
- ```
30
- $ docker run sspinc/csv2avro --help
31
- ```
32
-
33
21
  ### Streaming
34
22
  ```
35
23
  $ cat ./spec/support/data.csv | csv2avro --schema ./spec/support/schema.avsc --bad-rows ./spec/support/data.bad.csv > ./spec/support/data.avro
@@ -59,7 +47,7 @@ This will uncompress the file and converts it to avro, leaving the original file
59
47
  For a full list of available options, run `csv2avro --help`
60
48
  ```
61
49
  $ csv2avro --help
62
- Version 1.0.0 of CSV2Avro
50
+ Version 1.0.1 of CSV2Avro
63
51
  Usage: csv2avro [options] [file]
64
52
  -s, --schema SCHEMA A file containing the Avro schema. This value is required.
65
53
  -b, --bad-rows [BAD_ROWS] The output location of the bad rows file.
data/Rakefile CHANGED
@@ -16,26 +16,3 @@ RSpec::Core::RakeTask.new(:spec) do |task|
16
16
  end
17
17
 
18
18
  task :default => :spec
19
-
20
- namespace :docker do
21
- desc "Build docker image"
22
- task :build do
23
- sh "docker build -t sspinc/csv2avro:#{CSV2Avro::VERSION} ."
24
- minor_version = CSV2Avro::VERSION.sub(/\.[0-9]+$/, '')
25
- sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{minor_version}"
26
- major_version = minor_version.sub(/\.[0-9]+$/, '')
27
- sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{major_version}"
28
-
29
- sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:latest"
30
- end
31
-
32
- desc "Run specs inside docker image"
33
- task :spec => :build do
34
- sh "docker run -t --entrypoint=rake sspinc/csv2avro:#{CSV2Avro::VERSION} spec"
35
- end
36
-
37
- desc "Push docker image"
38
- task :push => :spec do
39
- sh "docker push sspinc/csv2avro"
40
- end
41
- end
data/csv2avro.gemspec CHANGED
@@ -24,5 +24,5 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "pry", "~> 0.10"
25
25
  spec.add_development_dependency "bump", "~> 0.5"
26
26
 
27
- spec.add_dependency "avro", "~> 1.7"
27
+ spec.add_runtime_dependency "avro", "~> 1.7"
28
28
  end
data/lib/avro_schema.rb CHANGED
@@ -47,7 +47,7 @@ module Avro
47
47
  if datum.nil? && expected_type != :null
48
48
  @errors << "Missing value at #{name}"
49
49
  else
50
- @errors << "'#{datum}' at #{name} does'n match the type '#{expected_schema.to_s}'"
50
+ @errors << "'#{datum}' at #{name} doesn't match the type '#{expected_schema.to_s}'"
51
51
  end
52
52
  end
53
53
 
@@ -17,11 +17,11 @@ class CSV2Avro
17
17
  end
18
18
 
19
19
  def writer_schema
20
- avro_writer.datum_writer.writers_schema
20
+ @avro_writer.datum_writer.writers_schema
21
21
  end
22
22
 
23
23
  def write(hash)
24
- avro_writer << hash
24
+ @avro_writer << hash
25
25
  end
26
26
  end
27
27
  end
@@ -4,122 +4,120 @@ require 'csv'
4
4
 
5
5
  class CSV2Avro
6
6
  class Converter
7
- attr_reader :writer, :bad_rows_writer, :error_writer, :schema, :reader, :csv_options, :converter_options, :header_row, :column_separator
8
-
9
7
  def initialize(reader, writer, bad_rows_writer, error_writer, options, schema: schema)
8
+ @reader = reader
10
9
  @writer = writer
11
10
  @bad_rows_writer = bad_rows_writer
12
11
  @error_writer = error_writer
12
+ @options = options
13
13
  @schema = schema
14
14
 
15
- @column_separator = options[:delimiter] || ','
16
-
17
- @reader = reader
18
- @header_row = reader.readline.strip
19
- header = header_row.split(column_separator)
20
-
21
- init_header_converter
22
- @csv_options = {
23
- headers: header,
24
- skip_blanks: true,
25
- col_sep: column_separator,
26
- header_converters: :aliases
27
- }
28
-
29
- @converter_options = options
15
+ # read header row explicitly
16
+ @header = @reader.readline.strip.split(col_sep)
30
17
  end
31
18
 
32
19
  def convert
33
- defaults = schema.defaults if converter_options[:write_defaults]
20
+ csv.each do |row|
21
+ hash = row.to_hash
34
22
 
35
- fields_to_convert = schema.types.reject{ |key, value| value == :string }
23
+ add_defaults_to_hash!(hash) if @options[:write_defaults]
24
+ convert_fields!(hash)
36
25
 
37
- reader.each do |line|
38
- CSV.parse(line, csv_options) do |row|
39
- row = row.to_hash
26
+ begin
27
+ @writer.write(hash)
28
+ rescue Avro::IO::AvroTypeError
29
+ bad_rows_csv << row
40
30
 
41
- if converter_options[:write_defaults]
42
- add_defaults_to_row!(row, defaults)
31
+ until Avro::Schema.errors.empty? do
32
+ @error_writer.puts("line #{line_number}: #{Avro::Schema.errors.shift}")
43
33
  end
34
+ end
35
+ end
44
36
 
45
- convert_fields!(row, fields_to_convert)
37
+ @writer.flush
38
+ rescue CSV::MalformedCSVError
39
+ @error_writer.puts("line #{line_number}: Unable to parse")
40
+ end
46
41
 
47
- begin
48
- writer.write(row)
49
- writer.flush
50
- rescue
51
- if bad_rows_writer.size == 0
52
- bad_rows_writer << header_row + "\n"
53
- end
42
+ private
54
43
 
55
- bad_rows_writer << line
56
- bad_rows_writer.flush
44
+ def array_delimiter
45
+ @options[:array_delimiter] || ','
46
+ end
57
47
 
58
- until Avro::Schema.errors.empty? do
59
- error_writer << "line #{reader.lineno}: #{Avro::Schema.errors.shift}\n"
60
- end
61
- end
62
- end
63
- end
48
+ def col_sep
49
+ @options[:delimiter] || ','
64
50
  end
65
51
 
66
- private
52
+ def csv_options
53
+ {
54
+ col_sep: col_sep,
55
+ headers: @header,
56
+ header_converters: :aliases,
57
+ skip_blanks: true,
58
+ write_headers: true
59
+ }
60
+ end
67
61
 
68
- def convert_fields!(row, fields_to_convert)
69
- fields_to_convert.each do |key, value|
70
- row[key] = begin
71
- case value
72
- when :int
73
- Integer(row[key])
74
- when :float, :double
75
- Float(row[key])
76
- when :boolean
77
- parse_boolean(row[key])
78
- when :array
79
- parse_array(row[key])
80
- when :enum
81
- row[key].downcase.tr(" ", "_")
82
- end
83
- rescue
84
- row[key]
85
- end
62
+ def csv
63
+ # Initialize header converter
64
+ CSV::HeaderConverters[:aliases] = lambda do |header|
65
+ @schema.aliases[header] || header
86
66
  end
87
67
 
88
- row
68
+ @csv ||= CSV.new(@reader, csv_options)
89
69
  end
90
70
 
91
- def parse_boolean(value)
92
- return true if value == true || value =~ (/^(true|t|yes|y|1)$/i)
93
- return false if value == false || value =~ (/^(false|f|no|n|0)$/i)
94
- nil
71
+ def bad_rows_csv
72
+ options = csv_options.tap { |hash| hash.delete(:header_converters) }
73
+ @bad_rows_csv ||= CSV.new(@bad_rows_writer, options)
95
74
  end
96
75
 
97
- def parse_array(value)
98
- delimiter = converter_options[:array_delimiter] || ','
99
-
100
- value.split(delimiter) if value
76
+ def line_number
77
+ @reader.lineno + 1
101
78
  end
102
79
 
103
- def add_defaults_to_row!(row, defaults)
104
- # Add default values to nil cells
105
- row.each do |key, value|
106
- row[key] = defaults[key] if value.nil?
80
+ def add_defaults_to_hash!(hash)
81
+ # Add default values to empty/missing fields
82
+ @schema.defaults.each do |key, value|
83
+ hash[key] = @schema.defaults[key] if hash[key].nil? or !hash.has_key?(key)
107
84
  end
85
+ end
108
86
 
109
- # Add default values to missing columns
110
- defaults.each do |key, value|
111
- row[key] = defaults[key] unless row.has_key?(key)
87
+ def convert_fields!(hash)
88
+ @schema.types.each do |key, value|
89
+ hash[key] = begin
90
+ case value
91
+ when :int
92
+ Integer(hash[key])
93
+ when :float, :double
94
+ Float(hash[key])
95
+ when :boolean
96
+ parse_boolean(hash[key])
97
+ when :array
98
+ parse_array(hash[key])
99
+ when :enum
100
+ hash[key].downcase.tr(" ", "_")
101
+ else
102
+ hash[key]
103
+ end
104
+ rescue
105
+ hash[key]
106
+ end
112
107
  end
113
-
114
- row
115
108
  end
116
109
 
117
- def init_header_converter
118
- aliases = schema.aliases
119
-
120
- CSV::HeaderConverters[:aliases] = lambda do |header|
121
- aliases[header] || header
110
+ def parse_boolean(value)
111
+ case
112
+ when value == true || value =~ (/^(true|t|yes|y|1)$/i) then true
113
+ when value == false || value =~ (/^(false|f|no|n|0)$/i) then false
114
+ else
115
+ nil
122
116
  end
123
117
  end
118
+
119
+ def parse_array(value)
120
+ value.split(array_delimiter) if value
121
+ end
124
122
  end
125
123
  end
@@ -1,3 +1,3 @@
1
1
  class CSV2Avro
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
data/lib/csv2avro.rb CHANGED
@@ -17,12 +17,7 @@ class CSV2Avro
17
17
  Converter.new(reader, writer, bad_rows_writer, error_writer, options, schema: schema).convert
18
18
  ensure
19
19
  writer.close if writer
20
-
21
- if bad_rows_writer.size == 0
22
- File.delete(bad_rows_uri)
23
- elsif bad_rows_writer
24
- bad_rows_writer.close
25
- end
20
+ bad_rows_writer.close
26
21
  end
27
22
 
28
23
  private
@@ -34,7 +29,7 @@ class CSV2Avro
34
29
  end
35
30
 
36
31
  def reader
37
- ARGF.lineno = 0
32
+ ARGF.lineno = -1
38
33
  ARGF
39
34
  end
40
35
 
@@ -1,9 +1,15 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe CSV2Avro::Converter do
4
- describe '#read' do
4
+ describe '#convert' do
5
+ let(:schema) { CSV2Avro::Schema.new(schema_reader) }
6
+ let(:writer) { StringIO.new }
7
+ let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
8
+ let(:bad_rows_writer) { StringIO.new }
9
+ let(:error_writer) { StringIO.new }
10
+
5
11
  context 'schema with string and integer columns' do
6
- let(:schema_io) do
12
+ let(:schema_reader) do
7
13
  StringIO.new(
8
14
  {
9
15
  name: 'categories',
@@ -20,7 +26,7 @@ RSpec.describe CSV2Avro::Converter do
20
26
  context 'separated with commas (csv)' do
21
27
  let(:reader) do
22
28
  StringIO.new(
23
- csv_string = CSV.generate do |csv|
29
+ CSV.generate do |csv|
24
30
  csv << %w[id name description]
25
31
  csv << %w[1 dresses Dresses]
26
32
  csv << %w[2 female-tops]
@@ -28,28 +34,20 @@ RSpec.describe CSV2Avro::Converter do
28
34
  )
29
35
  end
30
36
 
31
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
32
-
33
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
34
-
35
- let(:bad_rows_writer) { StringIO.new }
36
-
37
- let(:error_writer) { StringIO.new }
38
-
39
37
  before do
40
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, {}, schema: schema).convert
38
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
41
39
  end
42
40
 
43
41
  it 'should not have any bad rows' do
44
- expect(bad_rows_writer.read).to eq("")
42
+ expect(bad_rows_writer.read).to be_empty
45
43
  end
46
44
 
47
45
  it 'should not have any errors' do
48
- expect(error_writer.read).to eq("")
46
+ expect(error_writer.read).to be_empty
49
47
  end
50
48
 
51
49
  it 'should store the data with the given schema' do
52
- expect(AvroReader.new(writer).read).to eq(
50
+ expect(AvroReader.new(avro_writer).read).to eq(
53
51
  [
54
52
  { 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
55
53
  { 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
@@ -61,7 +59,7 @@ RSpec.describe CSV2Avro::Converter do
61
59
  context 'separated with tabs (tsv)' do
62
60
  let(:reader) do
63
61
  StringIO.new(
64
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
62
+ CSV.generate({col_sep: "\t"}) do |csv|
65
63
  csv << %w[id name description]
66
64
  csv << %w[1 dresses Dresses]
67
65
  csv << %w[2 female-tops]
@@ -69,24 +67,16 @@ RSpec.describe CSV2Avro::Converter do
69
67
  )
70
68
  end
71
69
 
72
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
73
-
74
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
75
-
76
- let(:bad_rows_writer) { StringIO.new }
77
-
78
- let(:error_writer) { StringIO.new }
79
-
80
70
  before do
81
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
71
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
82
72
  end
83
73
 
84
74
  it 'should not have any bad rows' do
85
- expect(bad_rows_writer.read).to eq("")
75
+ expect(bad_rows_writer.read).to be_empty
86
76
  end
87
77
 
88
78
  it 'should not have any errors' do
89
- expect(error_writer.read).to eq("")
79
+ expect(error_writer.read).to be_empty
90
80
  end
91
81
 
92
82
  it 'should store the data with the given schema' do
@@ -101,7 +91,7 @@ RSpec.describe CSV2Avro::Converter do
101
91
  end
102
92
 
103
93
  context 'schema with boolean and array columns' do
104
- let(:schema_io) do
94
+ let(:schema_reader) do
105
95
  StringIO.new(
106
96
  {
107
97
  name: 'categories',
@@ -118,7 +108,7 @@ RSpec.describe CSV2Avro::Converter do
118
108
  context 'separated with commas (default)' do
119
109
  let(:reader) do
120
110
  StringIO.new(
121
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
111
+ CSV.generate do |csv|
122
112
  csv << %w[id enabled image_links]
123
113
  csv << %w[1 true http://www.images.com/dresses.jpeg]
124
114
  csv << %w[2 false http://www.images.com/bras1.jpeg,http://www.images.com/bras2.jpeg]
@@ -126,24 +116,16 @@ RSpec.describe CSV2Avro::Converter do
126
116
  )
127
117
  end
128
118
 
129
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
130
-
131
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
132
-
133
- let(:bad_rows_writer) { StringIO.new }
134
-
135
- let(:error_writer) { StringIO.new }
136
-
137
119
  before do
138
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
120
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
139
121
  end
140
122
 
141
123
  it 'should not have any bad rows' do
142
- expect(bad_rows_writer.read).to eq("")
124
+ expect(bad_rows_writer.read).to be_empty
143
125
  end
144
126
 
145
127
  it 'should not have any errors' do
146
- expect(error_writer.read).to eq("")
128
+ expect(error_writer.read).to be_empty
147
129
  end
148
130
 
149
131
  it 'should store the data with the given schema' do
@@ -159,7 +141,7 @@ RSpec.describe CSV2Avro::Converter do
159
141
  context 'separated with semicolons' do
160
142
  let(:reader) do
161
143
  StringIO.new(
162
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
144
+ CSV.generate({col_sep: "\t"}) do |csv|
163
145
  csv << %w[id enabled image_links]
164
146
  csv << %w[1 true http://www.images.com/dresses.jpeg]
165
147
  csv << %w[2 false http://www.images.com/bras1.jpeg;http://www.images.com/bras2.jpeg]
@@ -167,24 +149,16 @@ RSpec.describe CSV2Avro::Converter do
167
149
  )
168
150
  end
169
151
 
170
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
171
-
172
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
173
-
174
- let(:bad_rows_writer) { StringIO.new }
175
-
176
- let(:error_writer) { StringIO.new }
177
-
178
152
  before do
179
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
153
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
180
154
  end
181
155
 
182
156
  it 'should not have any bad rows' do
183
- expect(bad_rows_writer.read).to eq("")
157
+ expect(bad_rows_writer.read).to be_empty
184
158
  end
185
159
 
186
160
  it 'should not have any errors' do
187
- expect(error_writer.read).to eq("")
161
+ expect(error_writer.read).to be_empty
188
162
  end
189
163
 
190
164
  it 'should store the data with the given schema' do
@@ -198,8 +172,8 @@ RSpec.describe CSV2Avro::Converter do
198
172
  end
199
173
  end
200
174
 
201
- context 'shema with default vaules' do
202
- let(:schema_io) do
175
+ context 'schema with default vaules' do
176
+ let(:schema_reader) do
203
177
  StringIO.new(
204
178
  {
205
179
  name: 'product',
@@ -216,7 +190,7 @@ RSpec.describe CSV2Avro::Converter do
216
190
 
217
191
  let(:reader) do
218
192
  StringIO.new(
219
- csv_string = CSV.generate do |csv|
193
+ CSV.generate do |csv|
220
194
  csv << %w[id category enabled]
221
195
  csv << %w[1 dresses true]
222
196
  csv << %w[2 ]
@@ -224,24 +198,16 @@ RSpec.describe CSV2Avro::Converter do
224
198
  )
225
199
  end
226
200
 
227
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
228
-
229
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
230
-
231
- let(:bad_rows_writer) { StringIO.new }
232
-
233
- let(:error_writer) { StringIO.new }
234
-
235
201
  before do
236
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
202
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
237
203
  end
238
204
 
239
205
  it 'should not have any bad rows' do
240
- expect(bad_rows_writer.read).to eq("")
206
+ expect(bad_rows_writer.read).to be_empty
241
207
  end
242
208
 
243
209
  it 'should not have any errors' do
244
- expect(error_writer.read).to eq("")
210
+ expect(error_writer.read).to be_empty
245
211
  end
246
212
 
247
213
  it 'should store the defaults data' do
@@ -257,7 +223,7 @@ RSpec.describe CSV2Avro::Converter do
257
223
  context 'schema with aliased fields' do
258
224
  let(:reader) do
259
225
  StringIO.new(
260
- csv_string = CSV.generate do |csv|
226
+ CSV.generate do |csv|
261
227
  csv << %w[id color_id]
262
228
  csv << %w[1 1_red]
263
229
  csv << %w[2 2_blue]
@@ -265,7 +231,7 @@ RSpec.describe CSV2Avro::Converter do
265
231
  )
266
232
  end
267
233
 
268
- let(:schema_io) do
234
+ let(:schema_reader) do
269
235
  StringIO.new(
270
236
  {
271
237
  name: 'product',
@@ -278,24 +244,16 @@ RSpec.describe CSV2Avro::Converter do
278
244
  )
279
245
  end
280
246
 
281
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
282
-
283
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
284
-
285
- let(:bad_rows_writer) { StringIO.new }
286
-
287
- let(:error_writer) { StringIO.new }
288
-
289
247
  before do
290
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, {}, schema: schema).convert
248
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
291
249
  end
292
250
 
293
251
  it 'should not have any bad rows' do
294
- expect(bad_rows_writer.read).to eq("")
252
+ expect(bad_rows_writer.read).to be_empty
295
253
  end
296
254
 
297
255
  it 'should not have any errors' do
298
- expect(error_writer.read).to eq("")
256
+ expect(error_writer.read).to be_empty
299
257
  end
300
258
 
301
259
  it 'should store the data with the given schema' do
@@ -309,7 +267,7 @@ RSpec.describe CSV2Avro::Converter do
309
267
  end
310
268
 
311
269
  context 'schema with enum column' do
312
- let(:schema_io) do
270
+ let(:schema_reader) do
313
271
  StringIO.new(
314
272
  {
315
273
  name: 'product',
@@ -330,7 +288,7 @@ RSpec.describe CSV2Avro::Converter do
330
288
 
331
289
  let(:reader) do
332
290
  StringIO.new(
333
- csv_string = CSV.generate do |csv|
291
+ CSV.generate do |csv|
334
292
  csv << %w[id size_type]
335
293
  csv << %w[1 regular]
336
294
  csv << %W[2 big\sand\stall]
@@ -339,24 +297,16 @@ RSpec.describe CSV2Avro::Converter do
339
297
  )
340
298
  end
341
299
 
342
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
343
-
344
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
345
-
346
- let(:bad_rows_writer) { StringIO.new }
347
-
348
- let(:error_writer) { StringIO.new }
349
-
350
300
  before do
351
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
301
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
352
302
  end
353
303
 
354
304
  it 'should not have any bad rows' do
355
- expect(bad_rows_writer.read).to eq("")
305
+ expect(bad_rows_writer.read).to be_empty
356
306
  end
357
307
 
358
308
  it 'should not have any errors' do
359
- expect(error_writer.read).to eq("")
309
+ expect(error_writer.read).to be_empty
360
310
  end
361
311
 
362
312
  it 'should store the data with the given schema' do
@@ -371,7 +321,7 @@ RSpec.describe CSV2Avro::Converter do
371
321
  end
372
322
 
373
323
  context 'data with bad rows' do
374
- let(:schema_io) do
324
+ let(:schema_reader) do
375
325
  StringIO.new(
376
326
  {
377
327
  name: 'categories',
@@ -387,7 +337,7 @@ RSpec.describe CSV2Avro::Converter do
387
337
 
388
338
  let(:reader) do
389
339
  StringIO.new(
390
- csv_string = CSV.generate({col_sep: "\t"}) do |csv|
340
+ CSV.generate({col_sep: "\t"}) do |csv|
391
341
  csv << %w[id title description]
392
342
  csv << ['1', nil, 'dresses']
393
343
  csv << %w[2 female-tops]
@@ -397,16 +347,8 @@ RSpec.describe CSV2Avro::Converter do
397
347
  )
398
348
  end
399
349
 
400
- let(:schema) { CSV2Avro::Schema.new(schema_io) }
401
-
402
- let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
403
-
404
- let(:bad_rows_writer) { StringIO.new }
405
-
406
- let(:error_writer) { StringIO.new }
407
-
408
350
  before do
409
- CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
351
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
410
352
  end
411
353
 
412
354
  it 'should have the bad data in the original form' do
@@ -2,20 +2,15 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe CSV2Avro do
4
4
  describe '#convert' do
5
- let(:options) do
6
- {
7
- schema: './spec/support/schema.avsc'
8
- }
9
- end
5
+ let(:options) { { schema: './spec/support/schema.avsc' } }
10
6
 
11
- subject(:converter) do
7
+ before do
12
8
  ARGV.replace ['./spec/support/data.csv']
13
-
14
- CSV2Avro.new(options)
15
9
  end
10
+ subject(:converter) { CSV2Avro.new(options) }
16
11
 
17
- it 'should write the problems to STDERR' do
18
- expect { converter.convert }.to output("line 4: Missing value at name\n").to_stderr
12
+ it 'should write errors to STDERR' do
13
+ expect { converter.convert }.to output("line 4: Missing value at name\nline 5: Unable to parse\n").to_stderr
19
14
  end
20
15
 
21
16
  it 'should have a bad row' do
@@ -1,4 +1,5 @@
1
1
  id,name,description
2
- 1,dresses,Dresses
2
+ 1,dresses,"Dresses"
3
3
  2,female-tops,
4
- 3,,Bras
4
+ 3,,"Bras"
5
+ 4,male-tops,"Male Tops""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv2avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ableda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-11 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -102,11 +102,9 @@ executables:
102
102
  extensions: []
103
103
  extra_rdoc_files: []
104
104
  files:
105
- - ".dockerignore"
106
105
  - ".gitignore"
107
106
  - ".travis.yml"
108
107
  - CHANGELOG.md
109
- - Dockerfile
110
108
  - Gemfile
111
109
  - LICENSE.txt
112
110
  - README.md
data/.dockerignore DELETED
@@ -1 +0,0 @@
1
- .git
data/Dockerfile DELETED
@@ -1,23 +0,0 @@
1
- FROM ruby:2.1
2
- MAINTAINER Secret Sauce Partners, Inc. <dev@sspinc.io>
3
-
4
- RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
5
- python2.7 get-pip.py && \
6
- pip install awscli
7
-
8
- # throw errors if Gemfile has been modified since Gemfile.lock
9
- RUN bundle config --global frozen 1
10
-
11
- RUN mkdir -p /srv/csv2avro
12
- WORKDIR /srv/csv2avro
13
-
14
- RUN mkdir -p /srv/csv2avro/lib/csv2avro
15
-
16
- COPY lib/csv2avro/version.rb /srv/csv2avro/lib/csv2avro/version.rb
17
- COPY csv2avro.gemspec Gemfile Gemfile.lock /srv/csv2avro/
18
-
19
- RUN bundle install
20
-
21
- COPY . /srv/csv2avro
22
-
23
- ENTRYPOINT ["./bin/csv2avro"]