csv2avro 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/bin/csv2avro +2 -0
- data/csv2avro.gemspec +1 -0
- data/lib/csv2avro/converter.rb +19 -4
- data/lib/csv2avro/version.rb +1 -1
- data/lib/csv2avro.rb +24 -5
- data/spec/csv2avro/converter_spec.rb +8 -43
- data/spec/csv2avro_spec.rb +2 -8
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a5d99afbda08e7b21d3045731ad738ae3d5c129
|
4
|
+
data.tar.gz: 9acd1c36181e07032710143a1cf6e0d43d956153
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2bc2091b05c25a1fc4272d5b90386e24ca0e4dd51d0a0c0304a15c0c2db1cc799fcfd30b8100985023fb9d93e61ca1f28fef7911bbac0fc7c98cb57d0ea92f4
|
7
|
+
data.tar.gz: e53e649a5fba8a62e29d60bf84701b0084a360395af377985e5c18dad584d4610101eded0bb90cde16ac7ff6eeadf87fb932c14e939f40f2e98318d93555eee7
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,16 @@
|
|
3
3
|
All notable changes to this project are documented in this file.
|
4
4
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
5
5
|
|
6
|
+
## 1.2.0 (2015-11-18) [compare](https://github.com/sspinc/csv2avro/compare/1.1.0...1.2.0))
|
7
|
+
Structured logging and metrics
|
8
|
+
|
9
|
+
### Changed
|
10
|
+
* Log in JSON format using Logr (https://github.com/sspinc/logr)
|
11
|
+
|
12
|
+
### Added
|
13
|
+
* New started_converting and finished_converting events
|
14
|
+
* New lines_processed metric
|
15
|
+
|
6
16
|
## 1.1.0 (2015-09-16) [compare](https://github.com/sspinc/csv2avro/compare/1.0.2...1.1.0))
|
7
17
|
|
8
18
|
### Changed
|
data/bin/csv2avro
CHANGED
@@ -48,10 +48,12 @@ begin
|
|
48
48
|
|
49
49
|
CSV2Avro.new(options).convert
|
50
50
|
rescue OptionParser::MissingArgument => ex
|
51
|
+
CSV2Avro.logger.fatal(ex.message)
|
51
52
|
$stderr.puts ex.message
|
52
53
|
$stderr.puts option_parser
|
53
54
|
exit 2
|
54
55
|
rescue Exception => e
|
56
|
+
CSV2Avro.logger.fatal("processing failed: #{e.message}")
|
55
57
|
$stderr.puts 'Uh oh, something went wrong!'
|
56
58
|
$stderr.puts e.message
|
57
59
|
$stderr.puts e.backtrace.join("\n")
|
data/csv2avro.gemspec
CHANGED
data/lib/csv2avro/converter.rb
CHANGED
@@ -1,14 +1,24 @@
|
|
1
1
|
require 'csv2avro/schema'
|
2
2
|
require 'csv2avro/avro_writer'
|
3
3
|
require 'csv'
|
4
|
+
require 'logr'
|
4
5
|
|
5
6
|
class CSV2Avro
|
6
7
|
class Converter
|
7
|
-
|
8
|
+
|
9
|
+
def self.logger
|
10
|
+
@logger ||= Logr::Logger.new('csv2avro.converter')
|
11
|
+
end
|
12
|
+
|
13
|
+
def logger
|
14
|
+
self.class.logger
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(reader, writer, bad_rows_writer, filename, options, schema: schema)
|
8
18
|
@reader = reader
|
9
19
|
@writer = writer
|
10
20
|
@bad_rows_writer = bad_rows_writer
|
11
|
-
@
|
21
|
+
@filename = filename
|
12
22
|
@options = options
|
13
23
|
@schema = schema
|
14
24
|
|
@@ -22,7 +32,8 @@ class CSV2Avro
|
|
22
32
|
row = csv.shift
|
23
33
|
rescue CSV::MalformedCSVError
|
24
34
|
error_msg = "L#{row_number}: Unable to parse"
|
25
|
-
|
35
|
+
logger.event('parse_error', filename: @filename, line: row_number)
|
36
|
+
.error(error_msg)
|
26
37
|
@bad_rows_writer.puts(error_msg)
|
27
38
|
next
|
28
39
|
end
|
@@ -35,11 +46,15 @@ class CSV2Avro
|
|
35
46
|
@writer.write(hash)
|
36
47
|
rescue CSV2Avro::SchemaValidationError => e
|
37
48
|
error_msg = "L#{row_number}: #{e.errors.join(', ')}"
|
38
|
-
|
49
|
+
e.errors.each do |error|
|
50
|
+
logger.event('schema_violation', filename: @filename, line: row_number, cause: error)
|
51
|
+
.error(error_msg)
|
52
|
+
end
|
39
53
|
@bad_rows_writer.puts(error_msg)
|
40
54
|
end
|
41
55
|
end
|
42
56
|
@writer.flush
|
57
|
+
row_number
|
43
58
|
end
|
44
59
|
|
45
60
|
private
|
data/lib/csv2avro/version.rb
CHANGED
data/lib/csv2avro.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
require 'csv2avro/converter'
|
2
2
|
require 'csv2avro/version'
|
3
3
|
|
4
|
+
require 'logr'
|
5
|
+
|
4
6
|
class CSV2Avro
|
5
7
|
attr_reader :input_path, :schema_path, :bad_rows_path, :stdout_option, :options
|
6
8
|
|
9
|
+
def self.logger
|
10
|
+
@logger ||= Logr::Logger.new('csv2avro')
|
11
|
+
end
|
12
|
+
|
13
|
+
def logger
|
14
|
+
self.class.logger
|
15
|
+
end
|
16
|
+
|
7
17
|
def initialize(options)
|
8
18
|
@input_path = ARGV.first
|
9
19
|
@schema_path = options.delete(:schema)
|
@@ -14,7 +24,16 @@ class CSV2Avro
|
|
14
24
|
end
|
15
25
|
|
16
26
|
def convert
|
17
|
-
|
27
|
+
logger.event('started_converting', filename: input_filename)
|
28
|
+
.monitored("Started converting #{input_filename}", "Started converting #{input_filename}")
|
29
|
+
.info("Started converting #{input_filename}")
|
30
|
+
|
31
|
+
lines = Converter.new(reader, writer, bad_rows_writer, input_filename, options, schema: schema).convert
|
32
|
+
|
33
|
+
logger.event('finished_converting', filename: input_filename)
|
34
|
+
.metric('lines_processed', lines)
|
35
|
+
.monitored("Finished converting #{input_filename}", "Finished converting #{input_filename}, processed #{lines} lines in total.")
|
36
|
+
.info("Finished converting #{input_filename}")
|
18
37
|
ensure
|
19
38
|
writer.close if writer
|
20
39
|
bad_rows_writer.close
|
@@ -45,6 +64,10 @@ class CSV2Avro
|
|
45
64
|
end
|
46
65
|
end
|
47
66
|
|
67
|
+
def input_filename
|
68
|
+
File.basename(input_path)
|
69
|
+
end
|
70
|
+
|
48
71
|
def avro_uri
|
49
72
|
dir = File.dirname(input_path)
|
50
73
|
ext = File.extname(input_path)
|
@@ -53,10 +76,6 @@ class CSV2Avro
|
|
53
76
|
"#{dir}/#{name}.avro"
|
54
77
|
end
|
55
78
|
|
56
|
-
def error_writer
|
57
|
-
$stderr
|
58
|
-
end
|
59
|
-
|
60
79
|
def bad_rows_writer
|
61
80
|
@__bad_rows_writer ||= File.open(bad_rows_uri, 'w')
|
62
81
|
end
|
@@ -6,7 +6,6 @@ RSpec.describe CSV2Avro::Converter do
|
|
6
6
|
let(:writer) { StringIO.new }
|
7
7
|
let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
|
8
8
|
let(:bad_rows_writer) { StringIO.new }
|
9
|
-
let(:error_writer) { StringIO.new }
|
10
9
|
|
11
10
|
context 'schema with string and integer columns' do
|
12
11
|
let(:schema_reader) do
|
@@ -35,17 +34,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
35
34
|
end
|
36
35
|
|
37
36
|
before do
|
38
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
37
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
|
39
38
|
end
|
40
39
|
|
41
40
|
it 'should not have any bad rows' do
|
42
41
|
expect(bad_rows_writer.read).to be_empty
|
43
42
|
end
|
44
43
|
|
45
|
-
it 'should not have any errors' do
|
46
|
-
expect(error_writer.read).to be_empty
|
47
|
-
end
|
48
|
-
|
49
44
|
it 'should store the data with the given schema' do
|
50
45
|
expect(AvroReader.new(avro_writer).read).to eq(
|
51
46
|
[
|
@@ -68,17 +63,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
68
63
|
end
|
69
64
|
|
70
65
|
before do
|
71
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
66
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
|
72
67
|
end
|
73
68
|
|
74
69
|
it 'should not have any bad rows' do
|
75
70
|
expect(bad_rows_writer.read).to be_empty
|
76
71
|
end
|
77
72
|
|
78
|
-
it 'should not have any errors' do
|
79
|
-
expect(error_writer.read).to be_empty
|
80
|
-
end
|
81
|
-
|
82
73
|
it 'should store the data with the given schema' do
|
83
74
|
expect(AvroReader.new(writer).read).to eq(
|
84
75
|
[
|
@@ -117,17 +108,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
117
108
|
end
|
118
109
|
|
119
110
|
before do
|
120
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
111
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
|
121
112
|
end
|
122
113
|
|
123
114
|
it 'should not have any bad rows' do
|
124
115
|
expect(bad_rows_writer.read).to be_empty
|
125
116
|
end
|
126
117
|
|
127
|
-
it 'should not have any errors' do
|
128
|
-
expect(error_writer.read).to be_empty
|
129
|
-
end
|
130
|
-
|
131
118
|
it 'should store the data with the given schema' do
|
132
119
|
expect(AvroReader.new(writer).read).to eq(
|
133
120
|
[
|
@@ -150,17 +137,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
150
137
|
end
|
151
138
|
|
152
139
|
before do
|
153
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
140
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
|
154
141
|
end
|
155
142
|
|
156
143
|
it 'should not have any bad rows' do
|
157
144
|
expect(bad_rows_writer.read).to be_empty
|
158
145
|
end
|
159
146
|
|
160
|
-
it 'should not have any errors' do
|
161
|
-
expect(error_writer.read).to be_empty
|
162
|
-
end
|
163
|
-
|
164
147
|
it 'should store the data with the given schema' do
|
165
148
|
expect(AvroReader.new(writer).read).to eq(
|
166
149
|
[
|
@@ -199,17 +182,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
199
182
|
end
|
200
183
|
|
201
184
|
before do
|
202
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
185
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
|
203
186
|
end
|
204
187
|
|
205
188
|
it 'should not have any bad rows' do
|
206
189
|
expect(bad_rows_writer.read).to be_empty
|
207
190
|
end
|
208
191
|
|
209
|
-
it 'should not have any errors' do
|
210
|
-
expect(error_writer.read).to be_empty
|
211
|
-
end
|
212
|
-
|
213
192
|
it 'should store the defaults data' do
|
214
193
|
expect(AvroReader.new(writer).read).to eq(
|
215
194
|
[
|
@@ -245,17 +224,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
245
224
|
end
|
246
225
|
|
247
226
|
before do
|
248
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
227
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
|
249
228
|
end
|
250
229
|
|
251
230
|
it 'should not have any bad rows' do
|
252
231
|
expect(bad_rows_writer.read).to be_empty
|
253
232
|
end
|
254
233
|
|
255
|
-
it 'should not have any errors' do
|
256
|
-
expect(error_writer.read).to be_empty
|
257
|
-
end
|
258
|
-
|
259
234
|
it 'should store the data with the given schema' do
|
260
235
|
expect(AvroReader.new(writer).read).to eq(
|
261
236
|
[
|
@@ -298,17 +273,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
298
273
|
end
|
299
274
|
|
300
275
|
before do
|
301
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
276
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
|
302
277
|
end
|
303
278
|
|
304
279
|
it 'should not have any bad rows' do
|
305
280
|
expect(bad_rows_writer.read).to be_empty
|
306
281
|
end
|
307
282
|
|
308
|
-
it 'should not have any errors' do
|
309
|
-
expect(error_writer.read).to be_empty
|
310
|
-
end
|
311
|
-
|
312
283
|
it 'should store the data with the given schema' do
|
313
284
|
expect(AvroReader.new(writer).read).to eq(
|
314
285
|
[
|
@@ -348,7 +319,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
348
319
|
end
|
349
320
|
|
350
321
|
before do
|
351
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
322
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
|
352
323
|
end
|
353
324
|
|
354
325
|
it 'should report the bad rows correctly' do
|
@@ -357,12 +328,6 @@ RSpec.describe CSV2Avro::Converter do
|
|
357
328
|
)
|
358
329
|
end
|
359
330
|
|
360
|
-
it 'should have an error' do
|
361
|
-
expect(error_writer.string).to eq(
|
362
|
-
"L2: Missing value at name\nL5: Missing value at name\n"
|
363
|
-
)
|
364
|
-
end
|
365
|
-
|
366
331
|
it 'should store the data with the given schema' do
|
367
332
|
expect(AvroReader.new(writer).read).to eq(
|
368
333
|
[
|
data/spec/csv2avro_spec.rb
CHANGED
@@ -10,13 +10,10 @@ RSpec.describe CSV2Avro do
|
|
10
10
|
context "Unquoted header" do
|
11
11
|
before do
|
12
12
|
ARGV.replace ['./spec/support/data.csv']
|
13
|
+
converter.convert
|
13
14
|
end
|
14
15
|
|
15
16
|
bad_rows_output = "L4: Missing value at name\nL7: Unable to parse\nL9: Missing value at id, Missing value at name\nL10: 'male-shoes' at id doesn't match the type '\"int\"', Missing value at name\n"
|
16
|
-
it 'should write errors to STDERR' do
|
17
|
-
expect { converter.convert }.to output(bad_rows_output).to_stderr
|
18
|
-
end
|
19
|
-
|
20
17
|
it 'should have bad rows' do
|
21
18
|
File.open('./spec/support/data.bad', 'r') do |file|
|
22
19
|
expect(file.read).to eq(bad_rows_output)
|
@@ -40,10 +37,7 @@ RSpec.describe CSV2Avro do
|
|
40
37
|
context "Quoted header" do
|
41
38
|
before do
|
42
39
|
ARGV.replace ['./spec/support/data_quoted.csv']
|
43
|
-
|
44
|
-
|
45
|
-
it 'should write errors to STDERR' do
|
46
|
-
expect { converter.convert }.to output("L4: Missing value at name\nL7: Unable to parse\n").to_stderr
|
40
|
+
converter.convert
|
47
41
|
end
|
48
42
|
|
49
43
|
it 'should have a bad row' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ableda
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -95,6 +95,20 @@ dependencies:
|
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '1.7'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: logr
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - "~>"
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0.1'
|
105
|
+
type: :runtime
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0.1'
|
98
112
|
description: Convert CSV files to Avro like a boss.
|
99
113
|
email:
|
100
114
|
- scotty@secretsaucepartners.com
|