csv2avro 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +5 -0
- data/README.md +1 -13
- data/Rakefile +0 -23
- data/csv2avro.gemspec +1 -1
- data/lib/avro_schema.rb +1 -1
- data/lib/csv2avro/avro_writer.rb +2 -2
- data/lib/csv2avro/converter.rb +80 -82
- data/lib/csv2avro/version.rb +1 -1
- data/lib/csv2avro.rb +2 -7
- data/spec/csv2avro/converter_spec.rb +45 -103
- data/spec/csv2avro_spec.rb +5 -10
- data/spec/support/data.csv +3 -2
- metadata +2 -4
- data/.dockerignore +0 -1
- data/Dockerfile +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4102328d73046f50036d1a35848142d0d23e50dc
|
4
|
+
data.tar.gz: d7f59e943fd02106579e9f25b6bc5480e5243b03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1e6e26a916c6beb65c25395e062fa8f45037171fcfc624798fb68417f257ac761fdb6ffd560dda251bee89953be59399f3e66ee0ce612e405cf352862cd4d30
|
7
|
+
data.tar.gz: e94c31792b3113edb64dfc1e72cb24868af3a446469df2ada734d6ef0210992c5739ad26ba485832fcfc37204193d1bbde59155df98a18c34b644cf65aac50f6
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,11 @@
|
|
3
3
|
All notable changes to this project are documented in this file.
|
4
4
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
5
5
|
|
6
|
+
## 1.0.1 (2015-06-12; [compare](https://github.com/sspinc/csv2avro/compare/1.0.0...1.0.1))
|
7
|
+
|
8
|
+
### Fixed
|
9
|
+
* CSV parsing issues
|
10
|
+
|
6
11
|
## 1.0.0 (2015-06-05; [compare](https://github.com/sspinc/csv2avro/compare/0.4.0...1.0.0))
|
7
12
|
|
8
13
|
### Added
|
data/README.md
CHANGED
@@ -8,12 +8,6 @@ Convert CSV files to Avro like a boss.
|
|
8
8
|
|
9
9
|
or if you prefer to live on the edge, just clone this repository and build it from scratch.
|
10
10
|
|
11
|
-
You can run the converter within a **Docker** container, you just need to pull the `sspinc/csv2avro` image.
|
12
|
-
|
13
|
-
```
|
14
|
-
$ docker pull sspinc/csv2avro
|
15
|
-
```
|
16
|
-
|
17
11
|
## Usage
|
18
12
|
|
19
13
|
### Basic
|
@@ -24,12 +18,6 @@ This will process the data.csv file and creates a *data.avro* file and a *data.b
|
|
24
18
|
|
25
19
|
You can override the bad-rows file location with the `--bad-rows [BAD_ROWS]` option.
|
26
20
|
|
27
|
-
### CSV2Avro in Docker
|
28
|
-
|
29
|
-
```
|
30
|
-
$ docker run sspinc/csv2avro --help
|
31
|
-
```
|
32
|
-
|
33
21
|
### Streaming
|
34
22
|
```
|
35
23
|
$ cat ./spec/support/data.csv | csv2avro --schema ./spec/support/schema.avsc --bad-rows ./spec/support/data.bad.csv > ./spec/support/data.avro
|
@@ -59,7 +47,7 @@ This will uncompress the file and converts it to avro, leaving the original file
|
|
59
47
|
For a full list of available options, run `csv2avro --help`
|
60
48
|
```
|
61
49
|
$ csv2avro --help
|
62
|
-
Version 1.0.
|
50
|
+
Version 1.0.1 of CSV2Avro
|
63
51
|
Usage: csv2avro [options] [file]
|
64
52
|
-s, --schema SCHEMA A file containing the Avro schema. This value is required.
|
65
53
|
-b, --bad-rows [BAD_ROWS] The output location of the bad rows file.
|
data/Rakefile
CHANGED
@@ -16,26 +16,3 @@ RSpec::Core::RakeTask.new(:spec) do |task|
|
|
16
16
|
end
|
17
17
|
|
18
18
|
task :default => :spec
|
19
|
-
|
20
|
-
namespace :docker do
|
21
|
-
desc "Build docker image"
|
22
|
-
task :build do
|
23
|
-
sh "docker build -t sspinc/csv2avro:#{CSV2Avro::VERSION} ."
|
24
|
-
minor_version = CSV2Avro::VERSION.sub(/\.[0-9]+$/, '')
|
25
|
-
sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{minor_version}"
|
26
|
-
major_version = minor_version.sub(/\.[0-9]+$/, '')
|
27
|
-
sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{major_version}"
|
28
|
-
|
29
|
-
sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:latest"
|
30
|
-
end
|
31
|
-
|
32
|
-
desc "Run specs inside docker image"
|
33
|
-
task :spec => :build do
|
34
|
-
sh "docker run -t --entrypoint=rake sspinc/csv2avro:#{CSV2Avro::VERSION} spec"
|
35
|
-
end
|
36
|
-
|
37
|
-
desc "Push docker image"
|
38
|
-
task :push => :spec do
|
39
|
-
sh "docker push sspinc/csv2avro"
|
40
|
-
end
|
41
|
-
end
|
data/csv2avro.gemspec
CHANGED
data/lib/avro_schema.rb
CHANGED
@@ -47,7 +47,7 @@ module Avro
|
|
47
47
|
if datum.nil? && expected_type != :null
|
48
48
|
@errors << "Missing value at #{name}"
|
49
49
|
else
|
50
|
-
@errors << "'#{datum}' at #{name}
|
50
|
+
@errors << "'#{datum}' at #{name} doesn't match the type '#{expected_schema.to_s}'"
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
data/lib/csv2avro/avro_writer.rb
CHANGED
data/lib/csv2avro/converter.rb
CHANGED
@@ -4,122 +4,120 @@ require 'csv'
|
|
4
4
|
|
5
5
|
class CSV2Avro
|
6
6
|
class Converter
|
7
|
-
attr_reader :writer, :bad_rows_writer, :error_writer, :schema, :reader, :csv_options, :converter_options, :header_row, :column_separator
|
8
|
-
|
9
7
|
def initialize(reader, writer, bad_rows_writer, error_writer, options, schema: schema)
|
8
|
+
@reader = reader
|
10
9
|
@writer = writer
|
11
10
|
@bad_rows_writer = bad_rows_writer
|
12
11
|
@error_writer = error_writer
|
12
|
+
@options = options
|
13
13
|
@schema = schema
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
@reader = reader
|
18
|
-
@header_row = reader.readline.strip
|
19
|
-
header = header_row.split(column_separator)
|
20
|
-
|
21
|
-
init_header_converter
|
22
|
-
@csv_options = {
|
23
|
-
headers: header,
|
24
|
-
skip_blanks: true,
|
25
|
-
col_sep: column_separator,
|
26
|
-
header_converters: :aliases
|
27
|
-
}
|
28
|
-
|
29
|
-
@converter_options = options
|
15
|
+
# read header row explicitly
|
16
|
+
@header = @reader.readline.strip.split(col_sep)
|
30
17
|
end
|
31
18
|
|
32
19
|
def convert
|
33
|
-
|
20
|
+
csv.each do |row|
|
21
|
+
hash = row.to_hash
|
34
22
|
|
35
|
-
|
23
|
+
add_defaults_to_hash!(hash) if @options[:write_defaults]
|
24
|
+
convert_fields!(hash)
|
36
25
|
|
37
|
-
|
38
|
-
|
39
|
-
|
26
|
+
begin
|
27
|
+
@writer.write(hash)
|
28
|
+
rescue Avro::IO::AvroTypeError
|
29
|
+
bad_rows_csv << row
|
40
30
|
|
41
|
-
|
42
|
-
|
31
|
+
until Avro::Schema.errors.empty? do
|
32
|
+
@error_writer.puts("line #{line_number}: #{Avro::Schema.errors.shift}")
|
43
33
|
end
|
34
|
+
end
|
35
|
+
end
|
44
36
|
|
45
|
-
|
37
|
+
@writer.flush
|
38
|
+
rescue CSV::MalformedCSVError
|
39
|
+
@error_writer.puts("line #{line_number}: Unable to parse")
|
40
|
+
end
|
46
41
|
|
47
|
-
|
48
|
-
writer.write(row)
|
49
|
-
writer.flush
|
50
|
-
rescue
|
51
|
-
if bad_rows_writer.size == 0
|
52
|
-
bad_rows_writer << header_row + "\n"
|
53
|
-
end
|
42
|
+
private
|
54
43
|
|
55
|
-
|
56
|
-
|
44
|
+
def array_delimiter
|
45
|
+
@options[:array_delimiter] || ','
|
46
|
+
end
|
57
47
|
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
48
|
+
def col_sep
|
49
|
+
@options[:delimiter] || ','
|
64
50
|
end
|
65
51
|
|
66
|
-
|
52
|
+
def csv_options
|
53
|
+
{
|
54
|
+
col_sep: col_sep,
|
55
|
+
headers: @header,
|
56
|
+
header_converters: :aliases,
|
57
|
+
skip_blanks: true,
|
58
|
+
write_headers: true
|
59
|
+
}
|
60
|
+
end
|
67
61
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
when :int
|
73
|
-
Integer(row[key])
|
74
|
-
when :float, :double
|
75
|
-
Float(row[key])
|
76
|
-
when :boolean
|
77
|
-
parse_boolean(row[key])
|
78
|
-
when :array
|
79
|
-
parse_array(row[key])
|
80
|
-
when :enum
|
81
|
-
row[key].downcase.tr(" ", "_")
|
82
|
-
end
|
83
|
-
rescue
|
84
|
-
row[key]
|
85
|
-
end
|
62
|
+
def csv
|
63
|
+
# Initialize header converter
|
64
|
+
CSV::HeaderConverters[:aliases] = lambda do |header|
|
65
|
+
@schema.aliases[header] || header
|
86
66
|
end
|
87
67
|
|
88
|
-
|
68
|
+
@csv ||= CSV.new(@reader, csv_options)
|
89
69
|
end
|
90
70
|
|
91
|
-
def
|
92
|
-
|
93
|
-
|
94
|
-
nil
|
71
|
+
def bad_rows_csv
|
72
|
+
options = csv_options.tap { |hash| hash.delete(:header_converters) }
|
73
|
+
@bad_rows_csv ||= CSV.new(@bad_rows_writer, options)
|
95
74
|
end
|
96
75
|
|
97
|
-
def
|
98
|
-
|
99
|
-
|
100
|
-
value.split(delimiter) if value
|
76
|
+
def line_number
|
77
|
+
@reader.lineno + 1
|
101
78
|
end
|
102
79
|
|
103
|
-
def
|
104
|
-
# Add default values to
|
105
|
-
|
106
|
-
|
80
|
+
def add_defaults_to_hash!(hash)
|
81
|
+
# Add default values to empty/missing fields
|
82
|
+
@schema.defaults.each do |key, value|
|
83
|
+
hash[key] = @schema.defaults[key] if hash[key].nil? or !hash.has_key?(key)
|
107
84
|
end
|
85
|
+
end
|
108
86
|
|
109
|
-
|
110
|
-
|
111
|
-
|
87
|
+
def convert_fields!(hash)
|
88
|
+
@schema.types.each do |key, value|
|
89
|
+
hash[key] = begin
|
90
|
+
case value
|
91
|
+
when :int
|
92
|
+
Integer(hash[key])
|
93
|
+
when :float, :double
|
94
|
+
Float(hash[key])
|
95
|
+
when :boolean
|
96
|
+
parse_boolean(hash[key])
|
97
|
+
when :array
|
98
|
+
parse_array(hash[key])
|
99
|
+
when :enum
|
100
|
+
hash[key].downcase.tr(" ", "_")
|
101
|
+
else
|
102
|
+
hash[key]
|
103
|
+
end
|
104
|
+
rescue
|
105
|
+
hash[key]
|
106
|
+
end
|
112
107
|
end
|
113
|
-
|
114
|
-
row
|
115
108
|
end
|
116
109
|
|
117
|
-
def
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
110
|
+
def parse_boolean(value)
|
111
|
+
case
|
112
|
+
when value == true || value =~ (/^(true|t|yes|y|1)$/i) then true
|
113
|
+
when value == false || value =~ (/^(false|f|no|n|0)$/i) then false
|
114
|
+
else
|
115
|
+
nil
|
122
116
|
end
|
123
117
|
end
|
118
|
+
|
119
|
+
def parse_array(value)
|
120
|
+
value.split(array_delimiter) if value
|
121
|
+
end
|
124
122
|
end
|
125
123
|
end
|
data/lib/csv2avro/version.rb
CHANGED
data/lib/csv2avro.rb
CHANGED
@@ -17,12 +17,7 @@ class CSV2Avro
|
|
17
17
|
Converter.new(reader, writer, bad_rows_writer, error_writer, options, schema: schema).convert
|
18
18
|
ensure
|
19
19
|
writer.close if writer
|
20
|
-
|
21
|
-
if bad_rows_writer.size == 0
|
22
|
-
File.delete(bad_rows_uri)
|
23
|
-
elsif bad_rows_writer
|
24
|
-
bad_rows_writer.close
|
25
|
-
end
|
20
|
+
bad_rows_writer.close
|
26
21
|
end
|
27
22
|
|
28
23
|
private
|
@@ -34,7 +29,7 @@ class CSV2Avro
|
|
34
29
|
end
|
35
30
|
|
36
31
|
def reader
|
37
|
-
ARGF.lineno =
|
32
|
+
ARGF.lineno = -1
|
38
33
|
ARGF
|
39
34
|
end
|
40
35
|
|
@@ -1,9 +1,15 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe CSV2Avro::Converter do
|
4
|
-
describe '#
|
4
|
+
describe '#convert' do
|
5
|
+
let(:schema) { CSV2Avro::Schema.new(schema_reader) }
|
6
|
+
let(:writer) { StringIO.new }
|
7
|
+
let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
|
8
|
+
let(:bad_rows_writer) { StringIO.new }
|
9
|
+
let(:error_writer) { StringIO.new }
|
10
|
+
|
5
11
|
context 'schema with string and integer columns' do
|
6
|
-
let(:
|
12
|
+
let(:schema_reader) do
|
7
13
|
StringIO.new(
|
8
14
|
{
|
9
15
|
name: 'categories',
|
@@ -20,7 +26,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
20
26
|
context 'separated with commas (csv)' do
|
21
27
|
let(:reader) do
|
22
28
|
StringIO.new(
|
23
|
-
|
29
|
+
CSV.generate do |csv|
|
24
30
|
csv << %w[id name description]
|
25
31
|
csv << %w[1 dresses Dresses]
|
26
32
|
csv << %w[2 female-tops]
|
@@ -28,28 +34,20 @@ RSpec.describe CSV2Avro::Converter do
|
|
28
34
|
)
|
29
35
|
end
|
30
36
|
|
31
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
32
|
-
|
33
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
34
|
-
|
35
|
-
let(:bad_rows_writer) { StringIO.new }
|
36
|
-
|
37
|
-
let(:error_writer) { StringIO.new }
|
38
|
-
|
39
37
|
before do
|
40
|
-
CSV2Avro::Converter.new(reader,
|
38
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
41
39
|
end
|
42
40
|
|
43
41
|
it 'should not have any bad rows' do
|
44
|
-
expect(bad_rows_writer.read).to
|
42
|
+
expect(bad_rows_writer.read).to be_empty
|
45
43
|
end
|
46
44
|
|
47
45
|
it 'should not have any errors' do
|
48
|
-
expect(error_writer.read).to
|
46
|
+
expect(error_writer.read).to be_empty
|
49
47
|
end
|
50
48
|
|
51
49
|
it 'should store the data with the given schema' do
|
52
|
-
expect(AvroReader.new(
|
50
|
+
expect(AvroReader.new(avro_writer).read).to eq(
|
53
51
|
[
|
54
52
|
{ 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
|
55
53
|
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
|
@@ -61,7 +59,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
61
59
|
context 'separated with tabs (tsv)' do
|
62
60
|
let(:reader) do
|
63
61
|
StringIO.new(
|
64
|
-
|
62
|
+
CSV.generate({col_sep: "\t"}) do |csv|
|
65
63
|
csv << %w[id name description]
|
66
64
|
csv << %w[1 dresses Dresses]
|
67
65
|
csv << %w[2 female-tops]
|
@@ -69,24 +67,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
69
67
|
)
|
70
68
|
end
|
71
69
|
|
72
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
73
|
-
|
74
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
75
|
-
|
76
|
-
let(:bad_rows_writer) { StringIO.new }
|
77
|
-
|
78
|
-
let(:error_writer) { StringIO.new }
|
79
|
-
|
80
70
|
before do
|
81
|
-
CSV2Avro::Converter.new(reader,
|
71
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
82
72
|
end
|
83
73
|
|
84
74
|
it 'should not have any bad rows' do
|
85
|
-
expect(bad_rows_writer.read).to
|
75
|
+
expect(bad_rows_writer.read).to be_empty
|
86
76
|
end
|
87
77
|
|
88
78
|
it 'should not have any errors' do
|
89
|
-
expect(error_writer.read).to
|
79
|
+
expect(error_writer.read).to be_empty
|
90
80
|
end
|
91
81
|
|
92
82
|
it 'should store the data with the given schema' do
|
@@ -101,7 +91,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
101
91
|
end
|
102
92
|
|
103
93
|
context 'schema with boolean and array columns' do
|
104
|
-
let(:
|
94
|
+
let(:schema_reader) do
|
105
95
|
StringIO.new(
|
106
96
|
{
|
107
97
|
name: 'categories',
|
@@ -118,7 +108,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
118
108
|
context 'separated with commas (default)' do
|
119
109
|
let(:reader) do
|
120
110
|
StringIO.new(
|
121
|
-
|
111
|
+
CSV.generate do |csv|
|
122
112
|
csv << %w[id enabled image_links]
|
123
113
|
csv << %w[1 true http://www.images.com/dresses.jpeg]
|
124
114
|
csv << %w[2 false http://www.images.com/bras1.jpeg,http://www.images.com/bras2.jpeg]
|
@@ -126,24 +116,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
126
116
|
)
|
127
117
|
end
|
128
118
|
|
129
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
130
|
-
|
131
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
132
|
-
|
133
|
-
let(:bad_rows_writer) { StringIO.new }
|
134
|
-
|
135
|
-
let(:error_writer) { StringIO.new }
|
136
|
-
|
137
119
|
before do
|
138
|
-
CSV2Avro::Converter.new(reader,
|
120
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
139
121
|
end
|
140
122
|
|
141
123
|
it 'should not have any bad rows' do
|
142
|
-
expect(bad_rows_writer.read).to
|
124
|
+
expect(bad_rows_writer.read).to be_empty
|
143
125
|
end
|
144
126
|
|
145
127
|
it 'should not have any errors' do
|
146
|
-
expect(error_writer.read).to
|
128
|
+
expect(error_writer.read).to be_empty
|
147
129
|
end
|
148
130
|
|
149
131
|
it 'should store the data with the given schema' do
|
@@ -159,7 +141,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
159
141
|
context 'separated with semicolons' do
|
160
142
|
let(:reader) do
|
161
143
|
StringIO.new(
|
162
|
-
|
144
|
+
CSV.generate({col_sep: "\t"}) do |csv|
|
163
145
|
csv << %w[id enabled image_links]
|
164
146
|
csv << %w[1 true http://www.images.com/dresses.jpeg]
|
165
147
|
csv << %w[2 false http://www.images.com/bras1.jpeg;http://www.images.com/bras2.jpeg]
|
@@ -167,24 +149,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
167
149
|
)
|
168
150
|
end
|
169
151
|
|
170
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
171
|
-
|
172
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
173
|
-
|
174
|
-
let(:bad_rows_writer) { StringIO.new }
|
175
|
-
|
176
|
-
let(:error_writer) { StringIO.new }
|
177
|
-
|
178
152
|
before do
|
179
|
-
CSV2Avro::Converter.new(reader,
|
153
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
|
180
154
|
end
|
181
155
|
|
182
156
|
it 'should not have any bad rows' do
|
183
|
-
expect(bad_rows_writer.read).to
|
157
|
+
expect(bad_rows_writer.read).to be_empty
|
184
158
|
end
|
185
159
|
|
186
160
|
it 'should not have any errors' do
|
187
|
-
expect(error_writer.read).to
|
161
|
+
expect(error_writer.read).to be_empty
|
188
162
|
end
|
189
163
|
|
190
164
|
it 'should store the data with the given schema' do
|
@@ -198,8 +172,8 @@ RSpec.describe CSV2Avro::Converter do
|
|
198
172
|
end
|
199
173
|
end
|
200
174
|
|
201
|
-
context '
|
202
|
-
let(:
|
175
|
+
context 'schema with default vaules' do
|
176
|
+
let(:schema_reader) do
|
203
177
|
StringIO.new(
|
204
178
|
{
|
205
179
|
name: 'product',
|
@@ -216,7 +190,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
216
190
|
|
217
191
|
let(:reader) do
|
218
192
|
StringIO.new(
|
219
|
-
|
193
|
+
CSV.generate do |csv|
|
220
194
|
csv << %w[id category enabled]
|
221
195
|
csv << %w[1 dresses true]
|
222
196
|
csv << %w[2 ]
|
@@ -224,24 +198,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
224
198
|
)
|
225
199
|
end
|
226
200
|
|
227
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
228
|
-
|
229
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
230
|
-
|
231
|
-
let(:bad_rows_writer) { StringIO.new }
|
232
|
-
|
233
|
-
let(:error_writer) { StringIO.new }
|
234
|
-
|
235
201
|
before do
|
236
|
-
CSV2Avro::Converter.new(reader,
|
202
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
|
237
203
|
end
|
238
204
|
|
239
205
|
it 'should not have any bad rows' do
|
240
|
-
expect(bad_rows_writer.read).to
|
206
|
+
expect(bad_rows_writer.read).to be_empty
|
241
207
|
end
|
242
208
|
|
243
209
|
it 'should not have any errors' do
|
244
|
-
expect(error_writer.read).to
|
210
|
+
expect(error_writer.read).to be_empty
|
245
211
|
end
|
246
212
|
|
247
213
|
it 'should store the defaults data' do
|
@@ -257,7 +223,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
257
223
|
context 'schema with aliased fields' do
|
258
224
|
let(:reader) do
|
259
225
|
StringIO.new(
|
260
|
-
|
226
|
+
CSV.generate do |csv|
|
261
227
|
csv << %w[id color_id]
|
262
228
|
csv << %w[1 1_red]
|
263
229
|
csv << %w[2 2_blue]
|
@@ -265,7 +231,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
265
231
|
)
|
266
232
|
end
|
267
233
|
|
268
|
-
let(:
|
234
|
+
let(:schema_reader) do
|
269
235
|
StringIO.new(
|
270
236
|
{
|
271
237
|
name: 'product',
|
@@ -278,24 +244,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
278
244
|
)
|
279
245
|
end
|
280
246
|
|
281
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
282
|
-
|
283
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
284
|
-
|
285
|
-
let(:bad_rows_writer) { StringIO.new }
|
286
|
-
|
287
|
-
let(:error_writer) { StringIO.new }
|
288
|
-
|
289
247
|
before do
|
290
|
-
CSV2Avro::Converter.new(reader,
|
248
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
291
249
|
end
|
292
250
|
|
293
251
|
it 'should not have any bad rows' do
|
294
|
-
expect(bad_rows_writer.read).to
|
252
|
+
expect(bad_rows_writer.read).to be_empty
|
295
253
|
end
|
296
254
|
|
297
255
|
it 'should not have any errors' do
|
298
|
-
expect(error_writer.read).to
|
256
|
+
expect(error_writer.read).to be_empty
|
299
257
|
end
|
300
258
|
|
301
259
|
it 'should store the data with the given schema' do
|
@@ -309,7 +267,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
309
267
|
end
|
310
268
|
|
311
269
|
context 'schema with enum column' do
|
312
|
-
let(:
|
270
|
+
let(:schema_reader) do
|
313
271
|
StringIO.new(
|
314
272
|
{
|
315
273
|
name: 'product',
|
@@ -330,7 +288,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
330
288
|
|
331
289
|
let(:reader) do
|
332
290
|
StringIO.new(
|
333
|
-
|
291
|
+
CSV.generate do |csv|
|
334
292
|
csv << %w[id size_type]
|
335
293
|
csv << %w[1 regular]
|
336
294
|
csv << %W[2 big\sand\stall]
|
@@ -339,24 +297,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
339
297
|
)
|
340
298
|
end
|
341
299
|
|
342
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
343
|
-
|
344
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
345
|
-
|
346
|
-
let(:bad_rows_writer) { StringIO.new }
|
347
|
-
|
348
|
-
let(:error_writer) { StringIO.new }
|
349
|
-
|
350
300
|
before do
|
351
|
-
CSV2Avro::Converter.new(reader,
|
301
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
|
352
302
|
end
|
353
303
|
|
354
304
|
it 'should not have any bad rows' do
|
355
|
-
expect(bad_rows_writer.read).to
|
305
|
+
expect(bad_rows_writer.read).to be_empty
|
356
306
|
end
|
357
307
|
|
358
308
|
it 'should not have any errors' do
|
359
|
-
expect(error_writer.read).to
|
309
|
+
expect(error_writer.read).to be_empty
|
360
310
|
end
|
361
311
|
|
362
312
|
it 'should store the data with the given schema' do
|
@@ -371,7 +321,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
371
321
|
end
|
372
322
|
|
373
323
|
context 'data with bad rows' do
|
374
|
-
let(:
|
324
|
+
let(:schema_reader) do
|
375
325
|
StringIO.new(
|
376
326
|
{
|
377
327
|
name: 'categories',
|
@@ -387,7 +337,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
387
337
|
|
388
338
|
let(:reader) do
|
389
339
|
StringIO.new(
|
390
|
-
|
340
|
+
CSV.generate({col_sep: "\t"}) do |csv|
|
391
341
|
csv << %w[id title description]
|
392
342
|
csv << ['1', nil, 'dresses']
|
393
343
|
csv << %w[2 female-tops]
|
@@ -397,16 +347,8 @@ RSpec.describe CSV2Avro::Converter do
|
|
397
347
|
)
|
398
348
|
end
|
399
349
|
|
400
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
401
|
-
|
402
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
403
|
-
|
404
|
-
let(:bad_rows_writer) { StringIO.new }
|
405
|
-
|
406
|
-
let(:error_writer) { StringIO.new }
|
407
|
-
|
408
350
|
before do
|
409
|
-
CSV2Avro::Converter.new(reader,
|
351
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
410
352
|
end
|
411
353
|
|
412
354
|
it 'should have the bad data in the original form' do
|
data/spec/csv2avro_spec.rb
CHANGED
@@ -2,20 +2,15 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe CSV2Avro do
|
4
4
|
describe '#convert' do
|
5
|
-
let(:options)
|
6
|
-
{
|
7
|
-
schema: './spec/support/schema.avsc'
|
8
|
-
}
|
9
|
-
end
|
5
|
+
let(:options) { { schema: './spec/support/schema.avsc' } }
|
10
6
|
|
11
|
-
|
7
|
+
before do
|
12
8
|
ARGV.replace ['./spec/support/data.csv']
|
13
|
-
|
14
|
-
CSV2Avro.new(options)
|
15
9
|
end
|
10
|
+
subject(:converter) { CSV2Avro.new(options) }
|
16
11
|
|
17
|
-
it 'should write
|
18
|
-
expect { converter.convert }.to output("line 4: Missing value at name\n").to_stderr
|
12
|
+
it 'should write errors to STDERR' do
|
13
|
+
expect { converter.convert }.to output("line 4: Missing value at name\nline 5: Unable to parse\n").to_stderr
|
19
14
|
end
|
20
15
|
|
21
16
|
it 'should have a bad row' do
|
data/spec/support/data.csv
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ableda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -102,11 +102,9 @@ executables:
|
|
102
102
|
extensions: []
|
103
103
|
extra_rdoc_files: []
|
104
104
|
files:
|
105
|
-
- ".dockerignore"
|
106
105
|
- ".gitignore"
|
107
106
|
- ".travis.yml"
|
108
107
|
- CHANGELOG.md
|
109
|
-
- Dockerfile
|
110
108
|
- Gemfile
|
111
109
|
- LICENSE.txt
|
112
110
|
- README.md
|
data/.dockerignore
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
.git
|
data/Dockerfile
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
FROM ruby:2.1
|
2
|
-
MAINTAINER Secret Sauce Partners, Inc. <dev@sspinc.io>
|
3
|
-
|
4
|
-
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
|
5
|
-
python2.7 get-pip.py && \
|
6
|
-
pip install awscli
|
7
|
-
|
8
|
-
# throw errors if Gemfile has been modified since Gemfile.lock
|
9
|
-
RUN bundle config --global frozen 1
|
10
|
-
|
11
|
-
RUN mkdir -p /srv/csv2avro
|
12
|
-
WORKDIR /srv/csv2avro
|
13
|
-
|
14
|
-
RUN mkdir -p /srv/csv2avro/lib/csv2avro
|
15
|
-
|
16
|
-
COPY lib/csv2avro/version.rb /srv/csv2avro/lib/csv2avro/version.rb
|
17
|
-
COPY csv2avro.gemspec Gemfile Gemfile.lock /srv/csv2avro/
|
18
|
-
|
19
|
-
RUN bundle install
|
20
|
-
|
21
|
-
COPY . /srv/csv2avro
|
22
|
-
|
23
|
-
ENTRYPOINT ["./bin/csv2avro"]
|