csv2avro 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +5 -0
- data/README.md +1 -13
- data/Rakefile +0 -23
- data/csv2avro.gemspec +1 -1
- data/lib/avro_schema.rb +1 -1
- data/lib/csv2avro/avro_writer.rb +2 -2
- data/lib/csv2avro/converter.rb +80 -82
- data/lib/csv2avro/version.rb +1 -1
- data/lib/csv2avro.rb +2 -7
- data/spec/csv2avro/converter_spec.rb +45 -103
- data/spec/csv2avro_spec.rb +5 -10
- data/spec/support/data.csv +3 -2
- metadata +2 -4
- data/.dockerignore +0 -1
- data/Dockerfile +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4102328d73046f50036d1a35848142d0d23e50dc
|
4
|
+
data.tar.gz: d7f59e943fd02106579e9f25b6bc5480e5243b03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1e6e26a916c6beb65c25395e062fa8f45037171fcfc624798fb68417f257ac761fdb6ffd560dda251bee89953be59399f3e66ee0ce612e405cf352862cd4d30
|
7
|
+
data.tar.gz: e94c31792b3113edb64dfc1e72cb24868af3a446469df2ada734d6ef0210992c5739ad26ba485832fcfc37204193d1bbde59155df98a18c34b644cf65aac50f6
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,11 @@
|
|
3
3
|
All notable changes to this project are documented in this file.
|
4
4
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
5
5
|
|
6
|
+
## 1.0.1 (2015-06-12; [compare](https://github.com/sspinc/csv2avro/compare/1.0.0...1.0.1))
|
7
|
+
|
8
|
+
### Fixed
|
9
|
+
* CSV parsing issues
|
10
|
+
|
6
11
|
## 1.0.0 (2015-06-05; [compare](https://github.com/sspinc/csv2avro/compare/0.4.0...1.0.0))
|
7
12
|
|
8
13
|
### Added
|
data/README.md
CHANGED
@@ -8,12 +8,6 @@ Convert CSV files to Avro like a boss.
|
|
8
8
|
|
9
9
|
or if you prefer to live on the edge, just clone this repository and build it from scratch.
|
10
10
|
|
11
|
-
You can run the converter within a **Docker** container, you just need to pull the `sspinc/csv2avro` image.
|
12
|
-
|
13
|
-
```
|
14
|
-
$ docker pull sspinc/csv2avro
|
15
|
-
```
|
16
|
-
|
17
11
|
## Usage
|
18
12
|
|
19
13
|
### Basic
|
@@ -24,12 +18,6 @@ This will process the data.csv file and creates a *data.avro* file and a *data.b
|
|
24
18
|
|
25
19
|
You can override the bad-rows file location with the `--bad-rows [BAD_ROWS]` option.
|
26
20
|
|
27
|
-
### CSV2Avro in Docker
|
28
|
-
|
29
|
-
```
|
30
|
-
$ docker run sspinc/csv2avro --help
|
31
|
-
```
|
32
|
-
|
33
21
|
### Streaming
|
34
22
|
```
|
35
23
|
$ cat ./spec/support/data.csv | csv2avro --schema ./spec/support/schema.avsc --bad-rows ./spec/support/data.bad.csv > ./spec/support/data.avro
|
@@ -59,7 +47,7 @@ This will uncompress the file and converts it to avro, leaving the original file
|
|
59
47
|
For a full list of available options, run `csv2avro --help`
|
60
48
|
```
|
61
49
|
$ csv2avro --help
|
62
|
-
Version 1.0.
|
50
|
+
Version 1.0.1 of CSV2Avro
|
63
51
|
Usage: csv2avro [options] [file]
|
64
52
|
-s, --schema SCHEMA A file containing the Avro schema. This value is required.
|
65
53
|
-b, --bad-rows [BAD_ROWS] The output location of the bad rows file.
|
data/Rakefile
CHANGED
@@ -16,26 +16,3 @@ RSpec::Core::RakeTask.new(:spec) do |task|
|
|
16
16
|
end
|
17
17
|
|
18
18
|
task :default => :spec
|
19
|
-
|
20
|
-
namespace :docker do
|
21
|
-
desc "Build docker image"
|
22
|
-
task :build do
|
23
|
-
sh "docker build -t sspinc/csv2avro:#{CSV2Avro::VERSION} ."
|
24
|
-
minor_version = CSV2Avro::VERSION.sub(/\.[0-9]+$/, '')
|
25
|
-
sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{minor_version}"
|
26
|
-
major_version = minor_version.sub(/\.[0-9]+$/, '')
|
27
|
-
sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:#{major_version}"
|
28
|
-
|
29
|
-
sh "docker tag -f sspinc/csv2avro:#{CSV2Avro::VERSION} sspinc/csv2avro:latest"
|
30
|
-
end
|
31
|
-
|
32
|
-
desc "Run specs inside docker image"
|
33
|
-
task :spec => :build do
|
34
|
-
sh "docker run -t --entrypoint=rake sspinc/csv2avro:#{CSV2Avro::VERSION} spec"
|
35
|
-
end
|
36
|
-
|
37
|
-
desc "Push docker image"
|
38
|
-
task :push => :spec do
|
39
|
-
sh "docker push sspinc/csv2avro"
|
40
|
-
end
|
41
|
-
end
|
data/csv2avro.gemspec
CHANGED
data/lib/avro_schema.rb
CHANGED
@@ -47,7 +47,7 @@ module Avro
|
|
47
47
|
if datum.nil? && expected_type != :null
|
48
48
|
@errors << "Missing value at #{name}"
|
49
49
|
else
|
50
|
-
@errors << "'#{datum}' at #{name}
|
50
|
+
@errors << "'#{datum}' at #{name} doesn't match the type '#{expected_schema.to_s}'"
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
data/lib/csv2avro/avro_writer.rb
CHANGED
data/lib/csv2avro/converter.rb
CHANGED
@@ -4,122 +4,120 @@ require 'csv'
|
|
4
4
|
|
5
5
|
class CSV2Avro
|
6
6
|
class Converter
|
7
|
-
attr_reader :writer, :bad_rows_writer, :error_writer, :schema, :reader, :csv_options, :converter_options, :header_row, :column_separator
|
8
|
-
|
9
7
|
def initialize(reader, writer, bad_rows_writer, error_writer, options, schema: schema)
|
8
|
+
@reader = reader
|
10
9
|
@writer = writer
|
11
10
|
@bad_rows_writer = bad_rows_writer
|
12
11
|
@error_writer = error_writer
|
12
|
+
@options = options
|
13
13
|
@schema = schema
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
@reader = reader
|
18
|
-
@header_row = reader.readline.strip
|
19
|
-
header = header_row.split(column_separator)
|
20
|
-
|
21
|
-
init_header_converter
|
22
|
-
@csv_options = {
|
23
|
-
headers: header,
|
24
|
-
skip_blanks: true,
|
25
|
-
col_sep: column_separator,
|
26
|
-
header_converters: :aliases
|
27
|
-
}
|
28
|
-
|
29
|
-
@converter_options = options
|
15
|
+
# read header row explicitly
|
16
|
+
@header = @reader.readline.strip.split(col_sep)
|
30
17
|
end
|
31
18
|
|
32
19
|
def convert
|
33
|
-
|
20
|
+
csv.each do |row|
|
21
|
+
hash = row.to_hash
|
34
22
|
|
35
|
-
|
23
|
+
add_defaults_to_hash!(hash) if @options[:write_defaults]
|
24
|
+
convert_fields!(hash)
|
36
25
|
|
37
|
-
|
38
|
-
|
39
|
-
|
26
|
+
begin
|
27
|
+
@writer.write(hash)
|
28
|
+
rescue Avro::IO::AvroTypeError
|
29
|
+
bad_rows_csv << row
|
40
30
|
|
41
|
-
|
42
|
-
|
31
|
+
until Avro::Schema.errors.empty? do
|
32
|
+
@error_writer.puts("line #{line_number}: #{Avro::Schema.errors.shift}")
|
43
33
|
end
|
34
|
+
end
|
35
|
+
end
|
44
36
|
|
45
|
-
|
37
|
+
@writer.flush
|
38
|
+
rescue CSV::MalformedCSVError
|
39
|
+
@error_writer.puts("line #{line_number}: Unable to parse")
|
40
|
+
end
|
46
41
|
|
47
|
-
|
48
|
-
writer.write(row)
|
49
|
-
writer.flush
|
50
|
-
rescue
|
51
|
-
if bad_rows_writer.size == 0
|
52
|
-
bad_rows_writer << header_row + "\n"
|
53
|
-
end
|
42
|
+
private
|
54
43
|
|
55
|
-
|
56
|
-
|
44
|
+
def array_delimiter
|
45
|
+
@options[:array_delimiter] || ','
|
46
|
+
end
|
57
47
|
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
48
|
+
def col_sep
|
49
|
+
@options[:delimiter] || ','
|
64
50
|
end
|
65
51
|
|
66
|
-
|
52
|
+
def csv_options
|
53
|
+
{
|
54
|
+
col_sep: col_sep,
|
55
|
+
headers: @header,
|
56
|
+
header_converters: :aliases,
|
57
|
+
skip_blanks: true,
|
58
|
+
write_headers: true
|
59
|
+
}
|
60
|
+
end
|
67
61
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
when :int
|
73
|
-
Integer(row[key])
|
74
|
-
when :float, :double
|
75
|
-
Float(row[key])
|
76
|
-
when :boolean
|
77
|
-
parse_boolean(row[key])
|
78
|
-
when :array
|
79
|
-
parse_array(row[key])
|
80
|
-
when :enum
|
81
|
-
row[key].downcase.tr(" ", "_")
|
82
|
-
end
|
83
|
-
rescue
|
84
|
-
row[key]
|
85
|
-
end
|
62
|
+
def csv
|
63
|
+
# Initialize header converter
|
64
|
+
CSV::HeaderConverters[:aliases] = lambda do |header|
|
65
|
+
@schema.aliases[header] || header
|
86
66
|
end
|
87
67
|
|
88
|
-
|
68
|
+
@csv ||= CSV.new(@reader, csv_options)
|
89
69
|
end
|
90
70
|
|
91
|
-
def
|
92
|
-
|
93
|
-
|
94
|
-
nil
|
71
|
+
def bad_rows_csv
|
72
|
+
options = csv_options.tap { |hash| hash.delete(:header_converters) }
|
73
|
+
@bad_rows_csv ||= CSV.new(@bad_rows_writer, options)
|
95
74
|
end
|
96
75
|
|
97
|
-
def
|
98
|
-
|
99
|
-
|
100
|
-
value.split(delimiter) if value
|
76
|
+
def line_number
|
77
|
+
@reader.lineno + 1
|
101
78
|
end
|
102
79
|
|
103
|
-
def
|
104
|
-
# Add default values to
|
105
|
-
|
106
|
-
|
80
|
+
def add_defaults_to_hash!(hash)
|
81
|
+
# Add default values to empty/missing fields
|
82
|
+
@schema.defaults.each do |key, value|
|
83
|
+
hash[key] = @schema.defaults[key] if hash[key].nil? or !hash.has_key?(key)
|
107
84
|
end
|
85
|
+
end
|
108
86
|
|
109
|
-
|
110
|
-
|
111
|
-
|
87
|
+
def convert_fields!(hash)
|
88
|
+
@schema.types.each do |key, value|
|
89
|
+
hash[key] = begin
|
90
|
+
case value
|
91
|
+
when :int
|
92
|
+
Integer(hash[key])
|
93
|
+
when :float, :double
|
94
|
+
Float(hash[key])
|
95
|
+
when :boolean
|
96
|
+
parse_boolean(hash[key])
|
97
|
+
when :array
|
98
|
+
parse_array(hash[key])
|
99
|
+
when :enum
|
100
|
+
hash[key].downcase.tr(" ", "_")
|
101
|
+
else
|
102
|
+
hash[key]
|
103
|
+
end
|
104
|
+
rescue
|
105
|
+
hash[key]
|
106
|
+
end
|
112
107
|
end
|
113
|
-
|
114
|
-
row
|
115
108
|
end
|
116
109
|
|
117
|
-
def
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
110
|
+
def parse_boolean(value)
|
111
|
+
case
|
112
|
+
when value == true || value =~ (/^(true|t|yes|y|1)$/i) then true
|
113
|
+
when value == false || value =~ (/^(false|f|no|n|0)$/i) then false
|
114
|
+
else
|
115
|
+
nil
|
122
116
|
end
|
123
117
|
end
|
118
|
+
|
119
|
+
def parse_array(value)
|
120
|
+
value.split(array_delimiter) if value
|
121
|
+
end
|
124
122
|
end
|
125
123
|
end
|
data/lib/csv2avro/version.rb
CHANGED
data/lib/csv2avro.rb
CHANGED
@@ -17,12 +17,7 @@ class CSV2Avro
|
|
17
17
|
Converter.new(reader, writer, bad_rows_writer, error_writer, options, schema: schema).convert
|
18
18
|
ensure
|
19
19
|
writer.close if writer
|
20
|
-
|
21
|
-
if bad_rows_writer.size == 0
|
22
|
-
File.delete(bad_rows_uri)
|
23
|
-
elsif bad_rows_writer
|
24
|
-
bad_rows_writer.close
|
25
|
-
end
|
20
|
+
bad_rows_writer.close
|
26
21
|
end
|
27
22
|
|
28
23
|
private
|
@@ -34,7 +29,7 @@ class CSV2Avro
|
|
34
29
|
end
|
35
30
|
|
36
31
|
def reader
|
37
|
-
ARGF.lineno =
|
32
|
+
ARGF.lineno = -1
|
38
33
|
ARGF
|
39
34
|
end
|
40
35
|
|
@@ -1,9 +1,15 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe CSV2Avro::Converter do
|
4
|
-
describe '#
|
4
|
+
describe '#convert' do
|
5
|
+
let(:schema) { CSV2Avro::Schema.new(schema_reader) }
|
6
|
+
let(:writer) { StringIO.new }
|
7
|
+
let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
|
8
|
+
let(:bad_rows_writer) { StringIO.new }
|
9
|
+
let(:error_writer) { StringIO.new }
|
10
|
+
|
5
11
|
context 'schema with string and integer columns' do
|
6
|
-
let(:
|
12
|
+
let(:schema_reader) do
|
7
13
|
StringIO.new(
|
8
14
|
{
|
9
15
|
name: 'categories',
|
@@ -20,7 +26,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
20
26
|
context 'separated with commas (csv)' do
|
21
27
|
let(:reader) do
|
22
28
|
StringIO.new(
|
23
|
-
|
29
|
+
CSV.generate do |csv|
|
24
30
|
csv << %w[id name description]
|
25
31
|
csv << %w[1 dresses Dresses]
|
26
32
|
csv << %w[2 female-tops]
|
@@ -28,28 +34,20 @@ RSpec.describe CSV2Avro::Converter do
|
|
28
34
|
)
|
29
35
|
end
|
30
36
|
|
31
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
32
|
-
|
33
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
34
|
-
|
35
|
-
let(:bad_rows_writer) { StringIO.new }
|
36
|
-
|
37
|
-
let(:error_writer) { StringIO.new }
|
38
|
-
|
39
37
|
before do
|
40
|
-
CSV2Avro::Converter.new(reader,
|
38
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
41
39
|
end
|
42
40
|
|
43
41
|
it 'should not have any bad rows' do
|
44
|
-
expect(bad_rows_writer.read).to
|
42
|
+
expect(bad_rows_writer.read).to be_empty
|
45
43
|
end
|
46
44
|
|
47
45
|
it 'should not have any errors' do
|
48
|
-
expect(error_writer.read).to
|
46
|
+
expect(error_writer.read).to be_empty
|
49
47
|
end
|
50
48
|
|
51
49
|
it 'should store the data with the given schema' do
|
52
|
-
expect(AvroReader.new(
|
50
|
+
expect(AvroReader.new(avro_writer).read).to eq(
|
53
51
|
[
|
54
52
|
{ 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
|
55
53
|
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
|
@@ -61,7 +59,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
61
59
|
context 'separated with tabs (tsv)' do
|
62
60
|
let(:reader) do
|
63
61
|
StringIO.new(
|
64
|
-
|
62
|
+
CSV.generate({col_sep: "\t"}) do |csv|
|
65
63
|
csv << %w[id name description]
|
66
64
|
csv << %w[1 dresses Dresses]
|
67
65
|
csv << %w[2 female-tops]
|
@@ -69,24 +67,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
69
67
|
)
|
70
68
|
end
|
71
69
|
|
72
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
73
|
-
|
74
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
75
|
-
|
76
|
-
let(:bad_rows_writer) { StringIO.new }
|
77
|
-
|
78
|
-
let(:error_writer) { StringIO.new }
|
79
|
-
|
80
70
|
before do
|
81
|
-
CSV2Avro::Converter.new(reader,
|
71
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
82
72
|
end
|
83
73
|
|
84
74
|
it 'should not have any bad rows' do
|
85
|
-
expect(bad_rows_writer.read).to
|
75
|
+
expect(bad_rows_writer.read).to be_empty
|
86
76
|
end
|
87
77
|
|
88
78
|
it 'should not have any errors' do
|
89
|
-
expect(error_writer.read).to
|
79
|
+
expect(error_writer.read).to be_empty
|
90
80
|
end
|
91
81
|
|
92
82
|
it 'should store the data with the given schema' do
|
@@ -101,7 +91,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
101
91
|
end
|
102
92
|
|
103
93
|
context 'schema with boolean and array columns' do
|
104
|
-
let(:
|
94
|
+
let(:schema_reader) do
|
105
95
|
StringIO.new(
|
106
96
|
{
|
107
97
|
name: 'categories',
|
@@ -118,7 +108,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
118
108
|
context 'separated with commas (default)' do
|
119
109
|
let(:reader) do
|
120
110
|
StringIO.new(
|
121
|
-
|
111
|
+
CSV.generate do |csv|
|
122
112
|
csv << %w[id enabled image_links]
|
123
113
|
csv << %w[1 true http://www.images.com/dresses.jpeg]
|
124
114
|
csv << %w[2 false http://www.images.com/bras1.jpeg,http://www.images.com/bras2.jpeg]
|
@@ -126,24 +116,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
126
116
|
)
|
127
117
|
end
|
128
118
|
|
129
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
130
|
-
|
131
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
132
|
-
|
133
|
-
let(:bad_rows_writer) { StringIO.new }
|
134
|
-
|
135
|
-
let(:error_writer) { StringIO.new }
|
136
|
-
|
137
119
|
before do
|
138
|
-
CSV2Avro::Converter.new(reader,
|
120
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
139
121
|
end
|
140
122
|
|
141
123
|
it 'should not have any bad rows' do
|
142
|
-
expect(bad_rows_writer.read).to
|
124
|
+
expect(bad_rows_writer.read).to be_empty
|
143
125
|
end
|
144
126
|
|
145
127
|
it 'should not have any errors' do
|
146
|
-
expect(error_writer.read).to
|
128
|
+
expect(error_writer.read).to be_empty
|
147
129
|
end
|
148
130
|
|
149
131
|
it 'should store the data with the given schema' do
|
@@ -159,7 +141,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
159
141
|
context 'separated with semicolons' do
|
160
142
|
let(:reader) do
|
161
143
|
StringIO.new(
|
162
|
-
|
144
|
+
CSV.generate({col_sep: "\t"}) do |csv|
|
163
145
|
csv << %w[id enabled image_links]
|
164
146
|
csv << %w[1 true http://www.images.com/dresses.jpeg]
|
165
147
|
csv << %w[2 false http://www.images.com/bras1.jpeg;http://www.images.com/bras2.jpeg]
|
@@ -167,24 +149,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
167
149
|
)
|
168
150
|
end
|
169
151
|
|
170
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
171
|
-
|
172
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
173
|
-
|
174
|
-
let(:bad_rows_writer) { StringIO.new }
|
175
|
-
|
176
|
-
let(:error_writer) { StringIO.new }
|
177
|
-
|
178
152
|
before do
|
179
|
-
CSV2Avro::Converter.new(reader,
|
153
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
|
180
154
|
end
|
181
155
|
|
182
156
|
it 'should not have any bad rows' do
|
183
|
-
expect(bad_rows_writer.read).to
|
157
|
+
expect(bad_rows_writer.read).to be_empty
|
184
158
|
end
|
185
159
|
|
186
160
|
it 'should not have any errors' do
|
187
|
-
expect(error_writer.read).to
|
161
|
+
expect(error_writer.read).to be_empty
|
188
162
|
end
|
189
163
|
|
190
164
|
it 'should store the data with the given schema' do
|
@@ -198,8 +172,8 @@ RSpec.describe CSV2Avro::Converter do
|
|
198
172
|
end
|
199
173
|
end
|
200
174
|
|
201
|
-
context '
|
202
|
-
let(:
|
175
|
+
context 'schema with default vaules' do
|
176
|
+
let(:schema_reader) do
|
203
177
|
StringIO.new(
|
204
178
|
{
|
205
179
|
name: 'product',
|
@@ -216,7 +190,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
216
190
|
|
217
191
|
let(:reader) do
|
218
192
|
StringIO.new(
|
219
|
-
|
193
|
+
CSV.generate do |csv|
|
220
194
|
csv << %w[id category enabled]
|
221
195
|
csv << %w[1 dresses true]
|
222
196
|
csv << %w[2 ]
|
@@ -224,24 +198,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
224
198
|
)
|
225
199
|
end
|
226
200
|
|
227
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
228
|
-
|
229
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
230
|
-
|
231
|
-
let(:bad_rows_writer) { StringIO.new }
|
232
|
-
|
233
|
-
let(:error_writer) { StringIO.new }
|
234
|
-
|
235
201
|
before do
|
236
|
-
CSV2Avro::Converter.new(reader,
|
202
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
|
237
203
|
end
|
238
204
|
|
239
205
|
it 'should not have any bad rows' do
|
240
|
-
expect(bad_rows_writer.read).to
|
206
|
+
expect(bad_rows_writer.read).to be_empty
|
241
207
|
end
|
242
208
|
|
243
209
|
it 'should not have any errors' do
|
244
|
-
expect(error_writer.read).to
|
210
|
+
expect(error_writer.read).to be_empty
|
245
211
|
end
|
246
212
|
|
247
213
|
it 'should store the defaults data' do
|
@@ -257,7 +223,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
257
223
|
context 'schema with aliased fields' do
|
258
224
|
let(:reader) do
|
259
225
|
StringIO.new(
|
260
|
-
|
226
|
+
CSV.generate do |csv|
|
261
227
|
csv << %w[id color_id]
|
262
228
|
csv << %w[1 1_red]
|
263
229
|
csv << %w[2 2_blue]
|
@@ -265,7 +231,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
265
231
|
)
|
266
232
|
end
|
267
233
|
|
268
|
-
let(:
|
234
|
+
let(:schema_reader) do
|
269
235
|
StringIO.new(
|
270
236
|
{
|
271
237
|
name: 'product',
|
@@ -278,24 +244,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
278
244
|
)
|
279
245
|
end
|
280
246
|
|
281
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
282
|
-
|
283
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
284
|
-
|
285
|
-
let(:bad_rows_writer) { StringIO.new }
|
286
|
-
|
287
|
-
let(:error_writer) { StringIO.new }
|
288
|
-
|
289
247
|
before do
|
290
|
-
CSV2Avro::Converter.new(reader,
|
248
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
291
249
|
end
|
292
250
|
|
293
251
|
it 'should not have any bad rows' do
|
294
|
-
expect(bad_rows_writer.read).to
|
252
|
+
expect(bad_rows_writer.read).to be_empty
|
295
253
|
end
|
296
254
|
|
297
255
|
it 'should not have any errors' do
|
298
|
-
expect(error_writer.read).to
|
256
|
+
expect(error_writer.read).to be_empty
|
299
257
|
end
|
300
258
|
|
301
259
|
it 'should store the data with the given schema' do
|
@@ -309,7 +267,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
309
267
|
end
|
310
268
|
|
311
269
|
context 'schema with enum column' do
|
312
|
-
let(:
|
270
|
+
let(:schema_reader) do
|
313
271
|
StringIO.new(
|
314
272
|
{
|
315
273
|
name: 'product',
|
@@ -330,7 +288,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
330
288
|
|
331
289
|
let(:reader) do
|
332
290
|
StringIO.new(
|
333
|
-
|
291
|
+
CSV.generate do |csv|
|
334
292
|
csv << %w[id size_type]
|
335
293
|
csv << %w[1 regular]
|
336
294
|
csv << %W[2 big\sand\stall]
|
@@ -339,24 +297,16 @@ RSpec.describe CSV2Avro::Converter do
|
|
339
297
|
)
|
340
298
|
end
|
341
299
|
|
342
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
343
|
-
|
344
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
345
|
-
|
346
|
-
let(:bad_rows_writer) { StringIO.new }
|
347
|
-
|
348
|
-
let(:error_writer) { StringIO.new }
|
349
|
-
|
350
300
|
before do
|
351
|
-
CSV2Avro::Converter.new(reader,
|
301
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
|
352
302
|
end
|
353
303
|
|
354
304
|
it 'should not have any bad rows' do
|
355
|
-
expect(bad_rows_writer.read).to
|
305
|
+
expect(bad_rows_writer.read).to be_empty
|
356
306
|
end
|
357
307
|
|
358
308
|
it 'should not have any errors' do
|
359
|
-
expect(error_writer.read).to
|
309
|
+
expect(error_writer.read).to be_empty
|
360
310
|
end
|
361
311
|
|
362
312
|
it 'should store the data with the given schema' do
|
@@ -371,7 +321,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
371
321
|
end
|
372
322
|
|
373
323
|
context 'data with bad rows' do
|
374
|
-
let(:
|
324
|
+
let(:schema_reader) do
|
375
325
|
StringIO.new(
|
376
326
|
{
|
377
327
|
name: 'categories',
|
@@ -387,7 +337,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
387
337
|
|
388
338
|
let(:reader) do
|
389
339
|
StringIO.new(
|
390
|
-
|
340
|
+
CSV.generate({col_sep: "\t"}) do |csv|
|
391
341
|
csv << %w[id title description]
|
392
342
|
csv << ['1', nil, 'dresses']
|
393
343
|
csv << %w[2 female-tops]
|
@@ -397,16 +347,8 @@ RSpec.describe CSV2Avro::Converter do
|
|
397
347
|
)
|
398
348
|
end
|
399
349
|
|
400
|
-
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
401
|
-
|
402
|
-
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
403
|
-
|
404
|
-
let(:bad_rows_writer) { StringIO.new }
|
405
|
-
|
406
|
-
let(:error_writer) { StringIO.new }
|
407
|
-
|
408
350
|
before do
|
409
|
-
CSV2Avro::Converter.new(reader,
|
351
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
410
352
|
end
|
411
353
|
|
412
354
|
it 'should have the bad data in the original form' do
|
data/spec/csv2avro_spec.rb
CHANGED
@@ -2,20 +2,15 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe CSV2Avro do
|
4
4
|
describe '#convert' do
|
5
|
-
let(:options)
|
6
|
-
{
|
7
|
-
schema: './spec/support/schema.avsc'
|
8
|
-
}
|
9
|
-
end
|
5
|
+
let(:options) { { schema: './spec/support/schema.avsc' } }
|
10
6
|
|
11
|
-
|
7
|
+
before do
|
12
8
|
ARGV.replace ['./spec/support/data.csv']
|
13
|
-
|
14
|
-
CSV2Avro.new(options)
|
15
9
|
end
|
10
|
+
subject(:converter) { CSV2Avro.new(options) }
|
16
11
|
|
17
|
-
it 'should write
|
18
|
-
expect { converter.convert }.to output("line 4: Missing value at name\n").to_stderr
|
12
|
+
it 'should write errors to STDERR' do
|
13
|
+
expect { converter.convert }.to output("line 4: Missing value at name\nline 5: Unable to parse\n").to_stderr
|
19
14
|
end
|
20
15
|
|
21
16
|
it 'should have a bad row' do
|
data/spec/support/data.csv
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ableda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -102,11 +102,9 @@ executables:
|
|
102
102
|
extensions: []
|
103
103
|
extra_rdoc_files: []
|
104
104
|
files:
|
105
|
-
- ".dockerignore"
|
106
105
|
- ".gitignore"
|
107
106
|
- ".travis.yml"
|
108
107
|
- CHANGELOG.md
|
109
|
-
- Dockerfile
|
110
108
|
- Gemfile
|
111
109
|
- LICENSE.txt
|
112
110
|
- README.md
|
data/.dockerignore
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
.git
|
data/Dockerfile
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
FROM ruby:2.1
|
2
|
-
MAINTAINER Secret Sauce Partners, Inc. <dev@sspinc.io>
|
3
|
-
|
4
|
-
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
|
5
|
-
python2.7 get-pip.py && \
|
6
|
-
pip install awscli
|
7
|
-
|
8
|
-
# throw errors if Gemfile has been modified since Gemfile.lock
|
9
|
-
RUN bundle config --global frozen 1
|
10
|
-
|
11
|
-
RUN mkdir -p /srv/csv2avro
|
12
|
-
WORKDIR /srv/csv2avro
|
13
|
-
|
14
|
-
RUN mkdir -p /srv/csv2avro/lib/csv2avro
|
15
|
-
|
16
|
-
COPY lib/csv2avro/version.rb /srv/csv2avro/lib/csv2avro/version.rb
|
17
|
-
COPY csv2avro.gemspec Gemfile Gemfile.lock /srv/csv2avro/
|
18
|
-
|
19
|
-
RUN bundle install
|
20
|
-
|
21
|
-
COPY . /srv/csv2avro
|
22
|
-
|
23
|
-
ENTRYPOINT ["./bin/csv2avro"]
|