csv2avro 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e7e9a8d86d5cd8e85b957ffecb5e60ccfe9c8b5
4
- data.tar.gz: 2b12a6828c601dfe19e6d93bc39b481ec1e15118
3
+ metadata.gz: 8a5d99afbda08e7b21d3045731ad738ae3d5c129
4
+ data.tar.gz: 9acd1c36181e07032710143a1cf6e0d43d956153
5
5
  SHA512:
6
- metadata.gz: 1caef810f21aa9f9b8dd1562c253a967f5b1c94382296f677dd6703624ac51d3d13774457c76820bf52c4a88795829d28a1b46017384ab306abf9f62bb50a078
7
- data.tar.gz: cf9f67c9316d2840f883a36082a30187ae8aebbe24db71a4100496ea833b20568b9ec16f4e92258b38705b7334f07b78b52766db78245cc81c2a32e3c6244d95
6
+ metadata.gz: f2bc2091b05c25a1fc4272d5b90386e24ca0e4dd51d0a0c0304a15c0c2db1cc799fcfd30b8100985023fb9d93e61ca1f28fef7911bbac0fc7c98cb57d0ea92f4
7
+ data.tar.gz: e53e649a5fba8a62e29d60bf84701b0084a360395af377985e5c18dad584d4610101eded0bb90cde16ac7ff6eeadf87fb932c14e939f40f2e98318d93555eee7
data/CHANGELOG.md CHANGED
@@ -3,6 +3,16 @@
3
3
  All notable changes to this project are documented in this file.
4
4
  This project adheres to [Semantic Versioning](http://semver.org/).
5
5
 
6
+ ## 1.2.0 (2015-11-18) [compare](https://github.com/sspinc/csv2avro/compare/1.1.0...1.2.0))
7
+ Structured logging and metrics
8
+
9
+ ### Changed
10
+ * Log in JSON format using Logr (https://github.com/sspinc/logr)
11
+
12
+ ### Added
13
+ * New started_converting and finished_converting events
14
+ * New lines_processed metric
15
+
6
16
  ## 1.1.0 (2015-09-16) [compare](https://github.com/sspinc/csv2avro/compare/1.0.2...1.1.0))
7
17
 
8
18
  ### Changed
data/bin/csv2avro CHANGED
@@ -48,10 +48,12 @@ begin
48
48
 
49
49
  CSV2Avro.new(options).convert
50
50
  rescue OptionParser::MissingArgument => ex
51
+ CSV2Avro.logger.fatal(ex.message)
51
52
  $stderr.puts ex.message
52
53
  $stderr.puts option_parser
53
54
  exit 2
54
55
  rescue Exception => e
56
+ CSV2Avro.logger.fatal("processing failed: #{e.message}")
55
57
  $stderr.puts 'Uh oh, something went wrong!'
56
58
  $stderr.puts e.message
57
59
  $stderr.puts e.backtrace.join("\n")
data/csv2avro.gemspec CHANGED
@@ -25,4 +25,5 @@ Gem::Specification.new do |spec|
25
25
  spec.add_development_dependency "bump", "~> 0.5"
26
26
 
27
27
  spec.add_runtime_dependency "avro", "~> 1.7"
28
+ spec.add_runtime_dependency "logr", "~> 0.1"
28
29
  end
@@ -1,14 +1,24 @@
1
1
  require 'csv2avro/schema'
2
2
  require 'csv2avro/avro_writer'
3
3
  require 'csv'
4
+ require 'logr'
4
5
 
5
6
  class CSV2Avro
6
7
  class Converter
7
- def initialize(reader, writer, bad_rows_writer, error_writer, options, schema: schema)
8
+
9
+ def self.logger
10
+ @logger ||= Logr::Logger.new('csv2avro.converter')
11
+ end
12
+
13
+ def logger
14
+ self.class.logger
15
+ end
16
+
17
+ def initialize(reader, writer, bad_rows_writer, filename, options, schema: schema)
8
18
  @reader = reader
9
19
  @writer = writer
10
20
  @bad_rows_writer = bad_rows_writer
11
- @error_writer = error_writer
21
+ @filename = filename
12
22
  @options = options
13
23
  @schema = schema
14
24
 
@@ -22,7 +32,8 @@ class CSV2Avro
22
32
  row = csv.shift
23
33
  rescue CSV::MalformedCSVError
24
34
  error_msg = "L#{row_number}: Unable to parse"
25
- @error_writer.puts(error_msg)
35
+ logger.event('parse_error', filename: @filename, line: row_number)
36
+ .error(error_msg)
26
37
  @bad_rows_writer.puts(error_msg)
27
38
  next
28
39
  end
@@ -35,11 +46,15 @@ class CSV2Avro
35
46
  @writer.write(hash)
36
47
  rescue CSV2Avro::SchemaValidationError => e
37
48
  error_msg = "L#{row_number}: #{e.errors.join(', ')}"
38
- @error_writer.puts(error_msg)
49
+ e.errors.each do |error|
50
+ logger.event('schema_violation', filename: @filename, line: row_number, cause: error)
51
+ .error(error_msg)
52
+ end
39
53
  @bad_rows_writer.puts(error_msg)
40
54
  end
41
55
  end
42
56
  @writer.flush
57
+ row_number
43
58
  end
44
59
 
45
60
  private
@@ -1,3 +1,3 @@
1
1
  class CSV2Avro
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
data/lib/csv2avro.rb CHANGED
@@ -1,9 +1,19 @@
1
1
  require 'csv2avro/converter'
2
2
  require 'csv2avro/version'
3
3
 
4
+ require 'logr'
5
+
4
6
  class CSV2Avro
5
7
  attr_reader :input_path, :schema_path, :bad_rows_path, :stdout_option, :options
6
8
 
9
+ def self.logger
10
+ @logger ||= Logr::Logger.new('csv2avro')
11
+ end
12
+
13
+ def logger
14
+ self.class.logger
15
+ end
16
+
7
17
  def initialize(options)
8
18
  @input_path = ARGV.first
9
19
  @schema_path = options.delete(:schema)
@@ -14,7 +24,16 @@ class CSV2Avro
14
24
  end
15
25
 
16
26
  def convert
17
- Converter.new(reader, writer, bad_rows_writer, error_writer, options, schema: schema).convert
27
+ logger.event('started_converting', filename: input_filename)
28
+ .monitored("Started converting #{input_filename}", "Started converting #{input_filename}")
29
+ .info("Started converting #{input_filename}")
30
+
31
+ lines = Converter.new(reader, writer, bad_rows_writer, input_filename, options, schema: schema).convert
32
+
33
+ logger.event('finished_converting', filename: input_filename)
34
+ .metric('lines_processed', lines)
35
+ .monitored("Finished converting #{input_filename}", "Finished converting #{input_filename}, processed #{lines} lines in total.")
36
+ .info("Finished converting #{input_filename}")
18
37
  ensure
19
38
  writer.close if writer
20
39
  bad_rows_writer.close
@@ -45,6 +64,10 @@ class CSV2Avro
45
64
  end
46
65
  end
47
66
 
67
+ def input_filename
68
+ File.basename(input_path)
69
+ end
70
+
48
71
  def avro_uri
49
72
  dir = File.dirname(input_path)
50
73
  ext = File.extname(input_path)
@@ -53,10 +76,6 @@ class CSV2Avro
53
76
  "#{dir}/#{name}.avro"
54
77
  end
55
78
 
56
- def error_writer
57
- $stderr
58
- end
59
-
60
79
  def bad_rows_writer
61
80
  @__bad_rows_writer ||= File.open(bad_rows_uri, 'w')
62
81
  end
@@ -6,7 +6,6 @@ RSpec.describe CSV2Avro::Converter do
6
6
  let(:writer) { StringIO.new }
7
7
  let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
8
8
  let(:bad_rows_writer) { StringIO.new }
9
- let(:error_writer) { StringIO.new }
10
9
 
11
10
  context 'schema with string and integer columns' do
12
11
  let(:schema_reader) do
@@ -35,17 +34,13 @@ RSpec.describe CSV2Avro::Converter do
35
34
  end
36
35
 
37
36
  before do
38
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
37
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
39
38
  end
40
39
 
41
40
  it 'should not have any bad rows' do
42
41
  expect(bad_rows_writer.read).to be_empty
43
42
  end
44
43
 
45
- it 'should not have any errors' do
46
- expect(error_writer.read).to be_empty
47
- end
48
-
49
44
  it 'should store the data with the given schema' do
50
45
  expect(AvroReader.new(avro_writer).read).to eq(
51
46
  [
@@ -68,17 +63,13 @@ RSpec.describe CSV2Avro::Converter do
68
63
  end
69
64
 
70
65
  before do
71
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
66
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
72
67
  end
73
68
 
74
69
  it 'should not have any bad rows' do
75
70
  expect(bad_rows_writer.read).to be_empty
76
71
  end
77
72
 
78
- it 'should not have any errors' do
79
- expect(error_writer.read).to be_empty
80
- end
81
-
82
73
  it 'should store the data with the given schema' do
83
74
  expect(AvroReader.new(writer).read).to eq(
84
75
  [
@@ -117,17 +108,13 @@ RSpec.describe CSV2Avro::Converter do
117
108
  end
118
109
 
119
110
  before do
120
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
111
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
121
112
  end
122
113
 
123
114
  it 'should not have any bad rows' do
124
115
  expect(bad_rows_writer.read).to be_empty
125
116
  end
126
117
 
127
- it 'should not have any errors' do
128
- expect(error_writer.read).to be_empty
129
- end
130
-
131
118
  it 'should store the data with the given schema' do
132
119
  expect(AvroReader.new(writer).read).to eq(
133
120
  [
@@ -150,17 +137,13 @@ RSpec.describe CSV2Avro::Converter do
150
137
  end
151
138
 
152
139
  before do
153
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
140
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
154
141
  end
155
142
 
156
143
  it 'should not have any bad rows' do
157
144
  expect(bad_rows_writer.read).to be_empty
158
145
  end
159
146
 
160
- it 'should not have any errors' do
161
- expect(error_writer.read).to be_empty
162
- end
163
-
164
147
  it 'should store the data with the given schema' do
165
148
  expect(AvroReader.new(writer).read).to eq(
166
149
  [
@@ -199,17 +182,13 @@ RSpec.describe CSV2Avro::Converter do
199
182
  end
200
183
 
201
184
  before do
202
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
185
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
203
186
  end
204
187
 
205
188
  it 'should not have any bad rows' do
206
189
  expect(bad_rows_writer.read).to be_empty
207
190
  end
208
191
 
209
- it 'should not have any errors' do
210
- expect(error_writer.read).to be_empty
211
- end
212
-
213
192
  it 'should store the defaults data' do
214
193
  expect(AvroReader.new(writer).read).to eq(
215
194
  [
@@ -245,17 +224,13 @@ RSpec.describe CSV2Avro::Converter do
245
224
  end
246
225
 
247
226
  before do
248
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
227
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
249
228
  end
250
229
 
251
230
  it 'should not have any bad rows' do
252
231
  expect(bad_rows_writer.read).to be_empty
253
232
  end
254
233
 
255
- it 'should not have any errors' do
256
- expect(error_writer.read).to be_empty
257
- end
258
-
259
234
  it 'should store the data with the given schema' do
260
235
  expect(AvroReader.new(writer).read).to eq(
261
236
  [
@@ -298,17 +273,13 @@ RSpec.describe CSV2Avro::Converter do
298
273
  end
299
274
 
300
275
  before do
301
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
276
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
302
277
  end
303
278
 
304
279
  it 'should not have any bad rows' do
305
280
  expect(bad_rows_writer.read).to be_empty
306
281
  end
307
282
 
308
- it 'should not have any errors' do
309
- expect(error_writer.read).to be_empty
310
- end
311
-
312
283
  it 'should store the data with the given schema' do
313
284
  expect(AvroReader.new(writer).read).to eq(
314
285
  [
@@ -348,7 +319,7 @@ RSpec.describe CSV2Avro::Converter do
348
319
  end
349
320
 
350
321
  before do
351
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
322
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
352
323
  end
353
324
 
354
325
  it 'should report the bad rows correctly' do
@@ -357,12 +328,6 @@ RSpec.describe CSV2Avro::Converter do
357
328
  )
358
329
  end
359
330
 
360
- it 'should have an error' do
361
- expect(error_writer.string).to eq(
362
- "L2: Missing value at name\nL5: Missing value at name\n"
363
- )
364
- end
365
-
366
331
  it 'should store the data with the given schema' do
367
332
  expect(AvroReader.new(writer).read).to eq(
368
333
  [
@@ -10,13 +10,10 @@ RSpec.describe CSV2Avro do
10
10
  context "Unquoted header" do
11
11
  before do
12
12
  ARGV.replace ['./spec/support/data.csv']
13
+ converter.convert
13
14
  end
14
15
 
15
16
  bad_rows_output = "L4: Missing value at name\nL7: Unable to parse\nL9: Missing value at id, Missing value at name\nL10: 'male-shoes' at id doesn't match the type '\"int\"', Missing value at name\n"
16
- it 'should write errors to STDERR' do
17
- expect { converter.convert }.to output(bad_rows_output).to_stderr
18
- end
19
-
20
17
  it 'should have bad rows' do
21
18
  File.open('./spec/support/data.bad', 'r') do |file|
22
19
  expect(file.read).to eq(bad_rows_output)
@@ -40,10 +37,7 @@ RSpec.describe CSV2Avro do
40
37
  context "Quoted header" do
41
38
  before do
42
39
  ARGV.replace ['./spec/support/data_quoted.csv']
43
- end
44
-
45
- it 'should write errors to STDERR' do
46
- expect { converter.convert }.to output("L4: Missing value at name\nL7: Unable to parse\n").to_stderr
40
+ converter.convert
47
41
  end
48
42
 
49
43
  it 'should have a bad row' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv2avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ableda
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-09-16 00:00:00.000000000 Z
12
+ date: 2015-11-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -95,6 +95,20 @@ dependencies:
95
95
  - - "~>"
96
96
  - !ruby/object:Gem::Version
97
97
  version: '1.7'
98
+ - !ruby/object:Gem::Dependency
99
+ name: logr
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: '0.1'
105
+ type: :runtime
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: '0.1'
98
112
  description: Convert CSV files to Avro like a boss.
99
113
  email:
100
114
  - scotty@secretsaucepartners.com