csv2avro 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +4 -1
- data/bin/csv2avro +4 -0
- data/circle.yml +7 -0
- data/csv2avro.gemspec +1 -1
- data/lib/csv2avro/converter.rb +5 -1
- data/lib/csv2avro/version.rb +1 -1
- data/spec/csv2avro_spec.rb +31 -0
- data/spec/support/data_crlf.csv +4 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ca0f6dd6e1c7b69c3318b727e1b61b98adf9d6b
|
4
|
+
data.tar.gz: 50dc0ace77d0c56ac1cc048b8f4e778ecd76fd43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad0c57ddf4f596c74e2f545aac86f7e663a11d6d5ac9ac85dc5246cb45465237c8d33068ec175e9f7dec3809c78bb52fc76b6a26935c722585f7023aed1a47a6
|
7
|
+
data.tar.gz: a9e8c252d834cfd0d48f4739f21243eba47823f870fefddb3e5092bc88df113ae4dc79d88865410c1df93fbd8f5d9d292f6d7f35417e83f16f5dd283b6340aff
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,12 @@
|
|
3
3
|
All notable changes to this project are documented in this file.
|
4
4
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
5
5
|
|
6
|
+
## 1.3.0 (2016-02-16) [compare](https://github.com/sspinc/csv2avro/compare/1.2.0...1.3.0))
|
7
|
+
Support for custom line endings
|
8
|
+
|
9
|
+
### Added
|
10
|
+
* New line ending command line parameter
|
11
|
+
|
6
12
|
## 1.2.0 (2015-11-18) [compare](https://github.com/sspinc/csv2avro/compare/1.1.0...1.2.0))
|
7
13
|
Structured logging and metrics
|
8
14
|
|
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
Convert CSV files to Avro like a boss.
|
4
4
|
|
5
|
+

|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
9
|
$ gem install csv2avro
|
@@ -47,11 +49,12 @@ This will uncompress the file and converts it to avro, leaving the original file
|
|
47
49
|
For a full list of available options, run `csv2avro --help`
|
48
50
|
```
|
49
51
|
$ csv2avro --help
|
50
|
-
Version 1.0
|
52
|
+
Version 1.3.0 of CSV2Avro
|
51
53
|
Usage: csv2avro [options] [file]
|
52
54
|
-s, --schema SCHEMA A file containing the Avro schema. This value is required.
|
53
55
|
-b, --bad-rows [BAD_ROWS] The output location of the bad rows report file.
|
54
56
|
-d, --delimiter [DELIMITER] Field delimiter. If none specified, then comma is used as the delimiter.
|
57
|
+
-l, --line-ending [LINE_ENDING] Line ending character used as row separator in CSV parsing
|
55
58
|
-a [ARRAY_DELIMITER], Array field delimiter. If none specified, then comma is used as the delimiter.
|
56
59
|
--array-delimiter
|
57
60
|
-D, --write-defaults Write default values.
|
data/bin/csv2avro
CHANGED
@@ -22,6 +22,10 @@ option_parser = OptionParser.new do |opts|
|
|
22
22
|
options[:delimiter] = char.gsub("\\t", "\t")
|
23
23
|
end
|
24
24
|
|
25
|
+
opts.on('-l', '--line-ending [LINE_ENDING]', 'Line ending character used as row separator in CSV parsing') do |char|
|
26
|
+
options[:line_ending] = char.gsub("\\r", "\r").gsub("\\n", "\n")
|
27
|
+
end
|
28
|
+
|
25
29
|
opts.on('-a', '--array-delimiter [ARRAY_DELIMITER]', 'Array field delimiter. If none specified, then comma is used as the delimiter.') do |char|
|
26
30
|
options[:array_delimiter] = char
|
27
31
|
end
|
data/circle.yml
ADDED
data/csv2avro.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "bundler", "~> 1.
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
22
22
|
spec.add_development_dependency "rake", "~> 10.0"
|
23
23
|
spec.add_development_dependency "rspec", "~> 3.2"
|
24
24
|
spec.add_development_dependency "pry", "~> 0.10"
|
data/lib/csv2avro/converter.rb
CHANGED
@@ -67,9 +67,14 @@ class CSV2Avro
|
|
67
67
|
@options[:delimiter] || ','
|
68
68
|
end
|
69
69
|
|
70
|
+
def row_sep
|
71
|
+
@options[:line_ending] || :auto
|
72
|
+
end
|
73
|
+
|
70
74
|
def csv_options
|
71
75
|
{
|
72
76
|
col_sep: col_sep,
|
77
|
+
row_sep: row_sep,
|
73
78
|
headers: @header,
|
74
79
|
header_converters: :aliases,
|
75
80
|
skip_blanks: true,
|
@@ -82,7 +87,6 @@ class CSV2Avro
|
|
82
87
|
CSV::HeaderConverters[:aliases] = lambda do |header|
|
83
88
|
@schema.aliases[header] || header
|
84
89
|
end
|
85
|
-
|
86
90
|
@csv ||= CSV.new(@reader, csv_options)
|
87
91
|
end
|
88
92
|
|
data/lib/csv2avro/version.rb
CHANGED
data/spec/csv2avro_spec.rb
CHANGED
@@ -59,5 +59,36 @@ RSpec.describe CSV2Avro do
|
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
62
|
+
|
63
|
+
context "CRLF line endings" do
|
64
|
+
let(:options) { { schema: './spec/support/schema.avsc',
|
65
|
+
line_ending: "\r\n" } }
|
66
|
+
subject(:converter) do
|
67
|
+
CSV2Avro.new(options)
|
68
|
+
end
|
69
|
+
|
70
|
+
before do
|
71
|
+
ARGV.replace ['./spec/support/data_crlf.csv']
|
72
|
+
converter.convert
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'should have a bad row' do
|
76
|
+
File.open('./spec/support/data_crlf.bad', 'r') do |file|
|
77
|
+
expect(file.read).to eq("L4: Missing value at name\n")
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should contain the avro data' do
|
82
|
+
File.open('./spec/support/data_crlf.avro', 'r') do |file|
|
83
|
+
expect(AvroReader.new(file).read).to eq(
|
84
|
+
[
|
85
|
+
{ 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
|
86
|
+
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil },
|
87
|
+
]
|
88
|
+
)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
62
93
|
end
|
63
94
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ableda
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2016-02-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '1.
|
20
|
+
version: '1.10'
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '1.
|
27
|
+
version: '1.10'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rake
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- README.md
|
127
127
|
- Rakefile
|
128
128
|
- bin/csv2avro
|
129
|
+
- circle.yml
|
129
130
|
- csv2avro.gemspec
|
130
131
|
- lib/csv2avro.rb
|
131
132
|
- lib/csv2avro/avro_writer.rb
|
@@ -140,6 +141,7 @@ files:
|
|
140
141
|
- spec/spec_helper.rb
|
141
142
|
- spec/support/avro_reader.rb
|
142
143
|
- spec/support/data.csv
|
144
|
+
- spec/support/data_crlf.csv
|
143
145
|
- spec/support/data_quoted.csv
|
144
146
|
- spec/support/schema.avsc
|
145
147
|
homepage: ''
|
@@ -173,6 +175,7 @@ test_files:
|
|
173
175
|
- spec/spec_helper.rb
|
174
176
|
- spec/support/avro_reader.rb
|
175
177
|
- spec/support/data.csv
|
178
|
+
- spec/support/data_crlf.csv
|
176
179
|
- spec/support/data_quoted.csv
|
177
180
|
- spec/support/schema.avsc
|
178
181
|
has_rdoc:
|