csv2avro 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/bin/csv2avro +2 -0
- data/csv2avro.gemspec +1 -0
- data/lib/csv2avro/converter.rb +19 -4
- data/lib/csv2avro/version.rb +1 -1
- data/lib/csv2avro.rb +24 -5
- data/spec/csv2avro/converter_spec.rb +8 -43
- data/spec/csv2avro_spec.rb +2 -8
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a5d99afbda08e7b21d3045731ad738ae3d5c129
|
4
|
+
data.tar.gz: 9acd1c36181e07032710143a1cf6e0d43d956153
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2bc2091b05c25a1fc4272d5b90386e24ca0e4dd51d0a0c0304a15c0c2db1cc799fcfd30b8100985023fb9d93e61ca1f28fef7911bbac0fc7c98cb57d0ea92f4
|
7
|
+
data.tar.gz: e53e649a5fba8a62e29d60bf84701b0084a360395af377985e5c18dad584d4610101eded0bb90cde16ac7ff6eeadf87fb932c14e939f40f2e98318d93555eee7
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,16 @@
|
|
3
3
|
All notable changes to this project are documented in this file.
|
4
4
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
5
5
|
|
6
|
+
## 1.2.0 (2015-11-18) [compare](https://github.com/sspinc/csv2avro/compare/1.1.0...1.2.0))
|
7
|
+
Structured logging and metrics
|
8
|
+
|
9
|
+
### Changed
|
10
|
+
* Log in JSON format using Logr (https://github.com/sspinc/logr)
|
11
|
+
|
12
|
+
### Added
|
13
|
+
* New started_converting and finished_converting events
|
14
|
+
* New lines_processed metric
|
15
|
+
|
6
16
|
## 1.1.0 (2015-09-16) [compare](https://github.com/sspinc/csv2avro/compare/1.0.2...1.1.0))
|
7
17
|
|
8
18
|
### Changed
|
data/bin/csv2avro
CHANGED
@@ -48,10 +48,12 @@ begin
|
|
48
48
|
|
49
49
|
CSV2Avro.new(options).convert
|
50
50
|
rescue OptionParser::MissingArgument => ex
|
51
|
+
CSV2Avro.logger.fatal(ex.message)
|
51
52
|
$stderr.puts ex.message
|
52
53
|
$stderr.puts option_parser
|
53
54
|
exit 2
|
54
55
|
rescue Exception => e
|
56
|
+
CSV2Avro.logger.fatal("processing failed: #{e.message}")
|
55
57
|
$stderr.puts 'Uh oh, something went wrong!'
|
56
58
|
$stderr.puts e.message
|
57
59
|
$stderr.puts e.backtrace.join("\n")
|
data/csv2avro.gemspec
CHANGED
data/lib/csv2avro/converter.rb
CHANGED
@@ -1,14 +1,24 @@
|
|
1
1
|
require 'csv2avro/schema'
|
2
2
|
require 'csv2avro/avro_writer'
|
3
3
|
require 'csv'
|
4
|
+
require 'logr'
|
4
5
|
|
5
6
|
class CSV2Avro
|
6
7
|
class Converter
|
7
|
-
|
8
|
+
|
9
|
+
def self.logger
|
10
|
+
@logger ||= Logr::Logger.new('csv2avro.converter')
|
11
|
+
end
|
12
|
+
|
13
|
+
def logger
|
14
|
+
self.class.logger
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(reader, writer, bad_rows_writer, filename, options, schema: schema)
|
8
18
|
@reader = reader
|
9
19
|
@writer = writer
|
10
20
|
@bad_rows_writer = bad_rows_writer
|
11
|
-
@
|
21
|
+
@filename = filename
|
12
22
|
@options = options
|
13
23
|
@schema = schema
|
14
24
|
|
@@ -22,7 +32,8 @@ class CSV2Avro
|
|
22
32
|
row = csv.shift
|
23
33
|
rescue CSV::MalformedCSVError
|
24
34
|
error_msg = "L#{row_number}: Unable to parse"
|
25
|
-
|
35
|
+
logger.event('parse_error', filename: @filename, line: row_number)
|
36
|
+
.error(error_msg)
|
26
37
|
@bad_rows_writer.puts(error_msg)
|
27
38
|
next
|
28
39
|
end
|
@@ -35,11 +46,15 @@ class CSV2Avro
|
|
35
46
|
@writer.write(hash)
|
36
47
|
rescue CSV2Avro::SchemaValidationError => e
|
37
48
|
error_msg = "L#{row_number}: #{e.errors.join(', ')}"
|
38
|
-
|
49
|
+
e.errors.each do |error|
|
50
|
+
logger.event('schema_violation', filename: @filename, line: row_number, cause: error)
|
51
|
+
.error(error_msg)
|
52
|
+
end
|
39
53
|
@bad_rows_writer.puts(error_msg)
|
40
54
|
end
|
41
55
|
end
|
42
56
|
@writer.flush
|
57
|
+
row_number
|
43
58
|
end
|
44
59
|
|
45
60
|
private
|
data/lib/csv2avro/version.rb
CHANGED
data/lib/csv2avro.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
require 'csv2avro/converter'
|
2
2
|
require 'csv2avro/version'
|
3
3
|
|
4
|
+
require 'logr'
|
5
|
+
|
4
6
|
class CSV2Avro
|
5
7
|
attr_reader :input_path, :schema_path, :bad_rows_path, :stdout_option, :options
|
6
8
|
|
9
|
+
def self.logger
|
10
|
+
@logger ||= Logr::Logger.new('csv2avro')
|
11
|
+
end
|
12
|
+
|
13
|
+
def logger
|
14
|
+
self.class.logger
|
15
|
+
end
|
16
|
+
|
7
17
|
def initialize(options)
|
8
18
|
@input_path = ARGV.first
|
9
19
|
@schema_path = options.delete(:schema)
|
@@ -14,7 +24,16 @@ class CSV2Avro
|
|
14
24
|
end
|
15
25
|
|
16
26
|
def convert
|
17
|
-
|
27
|
+
logger.event('started_converting', filename: input_filename)
|
28
|
+
.monitored("Started converting #{input_filename}", "Started converting #{input_filename}")
|
29
|
+
.info("Started converting #{input_filename}")
|
30
|
+
|
31
|
+
lines = Converter.new(reader, writer, bad_rows_writer, input_filename, options, schema: schema).convert
|
32
|
+
|
33
|
+
logger.event('finished_converting', filename: input_filename)
|
34
|
+
.metric('lines_processed', lines)
|
35
|
+
.monitored("Finished converting #{input_filename}", "Finished converting #{input_filename}, processed #{lines} lines in total.")
|
36
|
+
.info("Finished converting #{input_filename}")
|
18
37
|
ensure
|
19
38
|
writer.close if writer
|
20
39
|
bad_rows_writer.close
|
@@ -45,6 +64,10 @@ class CSV2Avro
|
|
45
64
|
end
|
46
65
|
end
|
47
66
|
|
67
|
+
def input_filename
|
68
|
+
File.basename(input_path)
|
69
|
+
end
|
70
|
+
|
48
71
|
def avro_uri
|
49
72
|
dir = File.dirname(input_path)
|
50
73
|
ext = File.extname(input_path)
|
@@ -53,10 +76,6 @@ class CSV2Avro
|
|
53
76
|
"#{dir}/#{name}.avro"
|
54
77
|
end
|
55
78
|
|
56
|
-
def error_writer
|
57
|
-
$stderr
|
58
|
-
end
|
59
|
-
|
60
79
|
def bad_rows_writer
|
61
80
|
@__bad_rows_writer ||= File.open(bad_rows_uri, 'w')
|
62
81
|
end
|
@@ -6,7 +6,6 @@ RSpec.describe CSV2Avro::Converter do
|
|
6
6
|
let(:writer) { StringIO.new }
|
7
7
|
let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
|
8
8
|
let(:bad_rows_writer) { StringIO.new }
|
9
|
-
let(:error_writer) { StringIO.new }
|
10
9
|
|
11
10
|
context 'schema with string and integer columns' do
|
12
11
|
let(:schema_reader) do
|
@@ -35,17 +34,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
35
34
|
end
|
36
35
|
|
37
36
|
before do
|
38
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
37
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
|
39
38
|
end
|
40
39
|
|
41
40
|
it 'should not have any bad rows' do
|
42
41
|
expect(bad_rows_writer.read).to be_empty
|
43
42
|
end
|
44
43
|
|
45
|
-
it 'should not have any errors' do
|
46
|
-
expect(error_writer.read).to be_empty
|
47
|
-
end
|
48
|
-
|
49
44
|
it 'should store the data with the given schema' do
|
50
45
|
expect(AvroReader.new(avro_writer).read).to eq(
|
51
46
|
[
|
@@ -68,17 +63,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
68
63
|
end
|
69
64
|
|
70
65
|
before do
|
71
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
66
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
|
72
67
|
end
|
73
68
|
|
74
69
|
it 'should not have any bad rows' do
|
75
70
|
expect(bad_rows_writer.read).to be_empty
|
76
71
|
end
|
77
72
|
|
78
|
-
it 'should not have any errors' do
|
79
|
-
expect(error_writer.read).to be_empty
|
80
|
-
end
|
81
|
-
|
82
73
|
it 'should store the data with the given schema' do
|
83
74
|
expect(AvroReader.new(writer).read).to eq(
|
84
75
|
[
|
@@ -117,17 +108,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
117
108
|
end
|
118
109
|
|
119
110
|
before do
|
120
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
111
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
|
121
112
|
end
|
122
113
|
|
123
114
|
it 'should not have any bad rows' do
|
124
115
|
expect(bad_rows_writer.read).to be_empty
|
125
116
|
end
|
126
117
|
|
127
|
-
it 'should not have any errors' do
|
128
|
-
expect(error_writer.read).to be_empty
|
129
|
-
end
|
130
|
-
|
131
118
|
it 'should store the data with the given schema' do
|
132
119
|
expect(AvroReader.new(writer).read).to eq(
|
133
120
|
[
|
@@ -150,17 +137,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
150
137
|
end
|
151
138
|
|
152
139
|
before do
|
153
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
140
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
|
154
141
|
end
|
155
142
|
|
156
143
|
it 'should not have any bad rows' do
|
157
144
|
expect(bad_rows_writer.read).to be_empty
|
158
145
|
end
|
159
146
|
|
160
|
-
it 'should not have any errors' do
|
161
|
-
expect(error_writer.read).to be_empty
|
162
|
-
end
|
163
|
-
|
164
147
|
it 'should store the data with the given schema' do
|
165
148
|
expect(AvroReader.new(writer).read).to eq(
|
166
149
|
[
|
@@ -199,17 +182,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
199
182
|
end
|
200
183
|
|
201
184
|
before do
|
202
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
185
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
|
203
186
|
end
|
204
187
|
|
205
188
|
it 'should not have any bad rows' do
|
206
189
|
expect(bad_rows_writer.read).to be_empty
|
207
190
|
end
|
208
191
|
|
209
|
-
it 'should not have any errors' do
|
210
|
-
expect(error_writer.read).to be_empty
|
211
|
-
end
|
212
|
-
|
213
192
|
it 'should store the defaults data' do
|
214
193
|
expect(AvroReader.new(writer).read).to eq(
|
215
194
|
[
|
@@ -245,17 +224,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
245
224
|
end
|
246
225
|
|
247
226
|
before do
|
248
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
227
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
|
249
228
|
end
|
250
229
|
|
251
230
|
it 'should not have any bad rows' do
|
252
231
|
expect(bad_rows_writer.read).to be_empty
|
253
232
|
end
|
254
233
|
|
255
|
-
it 'should not have any errors' do
|
256
|
-
expect(error_writer.read).to be_empty
|
257
|
-
end
|
258
|
-
|
259
234
|
it 'should store the data with the given schema' do
|
260
235
|
expect(AvroReader.new(writer).read).to eq(
|
261
236
|
[
|
@@ -298,17 +273,13 @@ RSpec.describe CSV2Avro::Converter do
|
|
298
273
|
end
|
299
274
|
|
300
275
|
before do
|
301
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
276
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
|
302
277
|
end
|
303
278
|
|
304
279
|
it 'should not have any bad rows' do
|
305
280
|
expect(bad_rows_writer.read).to be_empty
|
306
281
|
end
|
307
282
|
|
308
|
-
it 'should not have any errors' do
|
309
|
-
expect(error_writer.read).to be_empty
|
310
|
-
end
|
311
|
-
|
312
283
|
it 'should store the data with the given schema' do
|
313
284
|
expect(AvroReader.new(writer).read).to eq(
|
314
285
|
[
|
@@ -348,7 +319,7 @@ RSpec.describe CSV2Avro::Converter do
|
|
348
319
|
end
|
349
320
|
|
350
321
|
before do
|
351
|
-
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer,
|
322
|
+
CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
|
352
323
|
end
|
353
324
|
|
354
325
|
it 'should report the bad rows correctly' do
|
@@ -357,12 +328,6 @@ RSpec.describe CSV2Avro::Converter do
|
|
357
328
|
)
|
358
329
|
end
|
359
330
|
|
360
|
-
it 'should have an error' do
|
361
|
-
expect(error_writer.string).to eq(
|
362
|
-
"L2: Missing value at name\nL5: Missing value at name\n"
|
363
|
-
)
|
364
|
-
end
|
365
|
-
|
366
331
|
it 'should store the data with the given schema' do
|
367
332
|
expect(AvroReader.new(writer).read).to eq(
|
368
333
|
[
|
data/spec/csv2avro_spec.rb
CHANGED
@@ -10,13 +10,10 @@ RSpec.describe CSV2Avro do
|
|
10
10
|
context "Unquoted header" do
|
11
11
|
before do
|
12
12
|
ARGV.replace ['./spec/support/data.csv']
|
13
|
+
converter.convert
|
13
14
|
end
|
14
15
|
|
15
16
|
bad_rows_output = "L4: Missing value at name\nL7: Unable to parse\nL9: Missing value at id, Missing value at name\nL10: 'male-shoes' at id doesn't match the type '\"int\"', Missing value at name\n"
|
16
|
-
it 'should write errors to STDERR' do
|
17
|
-
expect { converter.convert }.to output(bad_rows_output).to_stderr
|
18
|
-
end
|
19
|
-
|
20
17
|
it 'should have bad rows' do
|
21
18
|
File.open('./spec/support/data.bad', 'r') do |file|
|
22
19
|
expect(file.read).to eq(bad_rows_output)
|
@@ -40,10 +37,7 @@ RSpec.describe CSV2Avro do
|
|
40
37
|
context "Quoted header" do
|
41
38
|
before do
|
42
39
|
ARGV.replace ['./spec/support/data_quoted.csv']
|
43
|
-
|
44
|
-
|
45
|
-
it 'should write errors to STDERR' do
|
46
|
-
expect { converter.convert }.to output("L4: Missing value at name\nL7: Unable to parse\n").to_stderr
|
40
|
+
converter.convert
|
47
41
|
end
|
48
42
|
|
49
43
|
it 'should have a bad row' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv2avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ableda
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -95,6 +95,20 @@ dependencies:
|
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '1.7'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: logr
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - "~>"
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0.1'
|
105
|
+
type: :runtime
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0.1'
|
98
112
|
description: Convert CSV files to Avro like a boss.
|
99
113
|
email:
|
100
114
|
- scotty@secretsaucepartners.com
|