csv2avro 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e7e9a8d86d5cd8e85b957ffecb5e60ccfe9c8b5
4
- data.tar.gz: 2b12a6828c601dfe19e6d93bc39b481ec1e15118
3
+ metadata.gz: 8a5d99afbda08e7b21d3045731ad738ae3d5c129
4
+ data.tar.gz: 9acd1c36181e07032710143a1cf6e0d43d956153
5
5
  SHA512:
6
- metadata.gz: 1caef810f21aa9f9b8dd1562c253a967f5b1c94382296f677dd6703624ac51d3d13774457c76820bf52c4a88795829d28a1b46017384ab306abf9f62bb50a078
7
- data.tar.gz: cf9f67c9316d2840f883a36082a30187ae8aebbe24db71a4100496ea833b20568b9ec16f4e92258b38705b7334f07b78b52766db78245cc81c2a32e3c6244d95
6
+ metadata.gz: f2bc2091b05c25a1fc4272d5b90386e24ca0e4dd51d0a0c0304a15c0c2db1cc799fcfd30b8100985023fb9d93e61ca1f28fef7911bbac0fc7c98cb57d0ea92f4
7
+ data.tar.gz: e53e649a5fba8a62e29d60bf84701b0084a360395af377985e5c18dad584d4610101eded0bb90cde16ac7ff6eeadf87fb932c14e939f40f2e98318d93555eee7
data/CHANGELOG.md CHANGED
@@ -3,6 +3,16 @@
3
3
  All notable changes to this project are documented in this file.
4
4
  This project adheres to [Semantic Versioning](http://semver.org/).
5
5
 
6
+ ## 1.2.0 (2015-11-18) [compare](https://github.com/sspinc/csv2avro/compare/1.1.0...1.2.0))
7
+ Structured logging and metrics
8
+
9
+ ### Changed
10
+ * Log in JSON format using Logr (https://github.com/sspinc/logr)
11
+
12
+ ### Added
13
+ * New started_converting and finished_converting events
14
+ * New lines_processed metric
15
+
6
16
  ## 1.1.0 (2015-09-16) [compare](https://github.com/sspinc/csv2avro/compare/1.0.2...1.1.0))
7
17
 
8
18
  ### Changed
data/bin/csv2avro CHANGED
@@ -48,10 +48,12 @@ begin
48
48
 
49
49
  CSV2Avro.new(options).convert
50
50
  rescue OptionParser::MissingArgument => ex
51
+ CSV2Avro.logger.fatal(ex.message)
51
52
  $stderr.puts ex.message
52
53
  $stderr.puts option_parser
53
54
  exit 2
54
55
  rescue Exception => e
56
+ CSV2Avro.logger.fatal("processing failed: #{e.message}")
55
57
  $stderr.puts 'Uh oh, something went wrong!'
56
58
  $stderr.puts e.message
57
59
  $stderr.puts e.backtrace.join("\n")
data/csv2avro.gemspec CHANGED
@@ -25,4 +25,5 @@ Gem::Specification.new do |spec|
25
25
  spec.add_development_dependency "bump", "~> 0.5"
26
26
 
27
27
  spec.add_runtime_dependency "avro", "~> 1.7"
28
+ spec.add_runtime_dependency "logr", "~> 0.1"
28
29
  end
@@ -1,14 +1,24 @@
1
1
  require 'csv2avro/schema'
2
2
  require 'csv2avro/avro_writer'
3
3
  require 'csv'
4
+ require 'logr'
4
5
 
5
6
  class CSV2Avro
6
7
  class Converter
7
- def initialize(reader, writer, bad_rows_writer, error_writer, options, schema: schema)
8
+
9
+ def self.logger
10
+ @logger ||= Logr::Logger.new('csv2avro.converter')
11
+ end
12
+
13
+ def logger
14
+ self.class.logger
15
+ end
16
+
17
+ def initialize(reader, writer, bad_rows_writer, filename, options, schema: schema)
8
18
  @reader = reader
9
19
  @writer = writer
10
20
  @bad_rows_writer = bad_rows_writer
11
- @error_writer = error_writer
21
+ @filename = filename
12
22
  @options = options
13
23
  @schema = schema
14
24
 
@@ -22,7 +32,8 @@ class CSV2Avro
22
32
  row = csv.shift
23
33
  rescue CSV::MalformedCSVError
24
34
  error_msg = "L#{row_number}: Unable to parse"
25
- @error_writer.puts(error_msg)
35
+ logger.event('parse_error', filename: @filename, line: row_number)
36
+ .error(error_msg)
26
37
  @bad_rows_writer.puts(error_msg)
27
38
  next
28
39
  end
@@ -35,11 +46,15 @@ class CSV2Avro
35
46
  @writer.write(hash)
36
47
  rescue CSV2Avro::SchemaValidationError => e
37
48
  error_msg = "L#{row_number}: #{e.errors.join(', ')}"
38
- @error_writer.puts(error_msg)
49
+ e.errors.each do |error|
50
+ logger.event('schema_violation', filename: @filename, line: row_number, cause: error)
51
+ .error(error_msg)
52
+ end
39
53
  @bad_rows_writer.puts(error_msg)
40
54
  end
41
55
  end
42
56
  @writer.flush
57
+ row_number
43
58
  end
44
59
 
45
60
  private
@@ -1,3 +1,3 @@
1
1
  class CSV2Avro
2
- VERSION = "1.1.0"
2
+ VERSION = "1.2.0"
3
3
  end
data/lib/csv2avro.rb CHANGED
@@ -1,9 +1,19 @@
1
1
  require 'csv2avro/converter'
2
2
  require 'csv2avro/version'
3
3
 
4
+ require 'logr'
5
+
4
6
  class CSV2Avro
5
7
  attr_reader :input_path, :schema_path, :bad_rows_path, :stdout_option, :options
6
8
 
9
+ def self.logger
10
+ @logger ||= Logr::Logger.new('csv2avro')
11
+ end
12
+
13
+ def logger
14
+ self.class.logger
15
+ end
16
+
7
17
  def initialize(options)
8
18
  @input_path = ARGV.first
9
19
  @schema_path = options.delete(:schema)
@@ -14,7 +24,16 @@ class CSV2Avro
14
24
  end
15
25
 
16
26
  def convert
17
- Converter.new(reader, writer, bad_rows_writer, error_writer, options, schema: schema).convert
27
+ logger.event('started_converting', filename: input_filename)
28
+ .monitored("Started converting #{input_filename}", "Started converting #{input_filename}")
29
+ .info("Started converting #{input_filename}")
30
+
31
+ lines = Converter.new(reader, writer, bad_rows_writer, input_filename, options, schema: schema).convert
32
+
33
+ logger.event('finished_converting', filename: input_filename)
34
+ .metric('lines_processed', lines)
35
+ .monitored("Finished converting #{input_filename}", "Finished converting #{input_filename}, processed #{lines} lines in total.")
36
+ .info("Finished converting #{input_filename}")
18
37
  ensure
19
38
  writer.close if writer
20
39
  bad_rows_writer.close
@@ -45,6 +64,10 @@ class CSV2Avro
45
64
  end
46
65
  end
47
66
 
67
+ def input_filename
68
+ File.basename(input_path)
69
+ end
70
+
48
71
  def avro_uri
49
72
  dir = File.dirname(input_path)
50
73
  ext = File.extname(input_path)
@@ -53,10 +76,6 @@ class CSV2Avro
53
76
  "#{dir}/#{name}.avro"
54
77
  end
55
78
 
56
- def error_writer
57
- $stderr
58
- end
59
-
60
79
  def bad_rows_writer
61
80
  @__bad_rows_writer ||= File.open(bad_rows_uri, 'w')
62
81
  end
@@ -6,7 +6,6 @@ RSpec.describe CSV2Avro::Converter do
6
6
  let(:writer) { StringIO.new }
7
7
  let(:avro_writer) { CSV2Avro::AvroWriter.new(writer, schema) }
8
8
  let(:bad_rows_writer) { StringIO.new }
9
- let(:error_writer) { StringIO.new }
10
9
 
11
10
  context 'schema with string and integer columns' do
12
11
  let(:schema_reader) do
@@ -35,17 +34,13 @@ RSpec.describe CSV2Avro::Converter do
35
34
  end
36
35
 
37
36
  before do
38
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
37
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
39
38
  end
40
39
 
41
40
  it 'should not have any bad rows' do
42
41
  expect(bad_rows_writer.read).to be_empty
43
42
  end
44
43
 
45
- it 'should not have any errors' do
46
- expect(error_writer.read).to be_empty
47
- end
48
-
49
44
  it 'should store the data with the given schema' do
50
45
  expect(AvroReader.new(avro_writer).read).to eq(
51
46
  [
@@ -68,17 +63,13 @@ RSpec.describe CSV2Avro::Converter do
68
63
  end
69
64
 
70
65
  before do
71
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
66
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
72
67
  end
73
68
 
74
69
  it 'should not have any bad rows' do
75
70
  expect(bad_rows_writer.read).to be_empty
76
71
  end
77
72
 
78
- it 'should not have any errors' do
79
- expect(error_writer.read).to be_empty
80
- end
81
-
82
73
  it 'should store the data with the given schema' do
83
74
  expect(AvroReader.new(writer).read).to eq(
84
75
  [
@@ -117,17 +108,13 @@ RSpec.describe CSV2Avro::Converter do
117
108
  end
118
109
 
119
110
  before do
120
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
111
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
121
112
  end
122
113
 
123
114
  it 'should not have any bad rows' do
124
115
  expect(bad_rows_writer.read).to be_empty
125
116
  end
126
117
 
127
- it 'should not have any errors' do
128
- expect(error_writer.read).to be_empty
129
- end
130
-
131
118
  it 'should store the data with the given schema' do
132
119
  expect(AvroReader.new(writer).read).to eq(
133
120
  [
@@ -150,17 +137,13 @@ RSpec.describe CSV2Avro::Converter do
150
137
  end
151
138
 
152
139
  before do
153
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
140
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
154
141
  end
155
142
 
156
143
  it 'should not have any bad rows' do
157
144
  expect(bad_rows_writer.read).to be_empty
158
145
  end
159
146
 
160
- it 'should not have any errors' do
161
- expect(error_writer.read).to be_empty
162
- end
163
-
164
147
  it 'should store the data with the given schema' do
165
148
  expect(AvroReader.new(writer).read).to eq(
166
149
  [
@@ -199,17 +182,13 @@ RSpec.describe CSV2Avro::Converter do
199
182
  end
200
183
 
201
184
  before do
202
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
185
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
203
186
  end
204
187
 
205
188
  it 'should not have any bad rows' do
206
189
  expect(bad_rows_writer.read).to be_empty
207
190
  end
208
191
 
209
- it 'should not have any errors' do
210
- expect(error_writer.read).to be_empty
211
- end
212
-
213
192
  it 'should store the defaults data' do
214
193
  expect(AvroReader.new(writer).read).to eq(
215
194
  [
@@ -245,17 +224,13 @@ RSpec.describe CSV2Avro::Converter do
245
224
  end
246
225
 
247
226
  before do
248
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, {}, schema: schema).convert
227
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', {}, schema: schema).convert
249
228
  end
250
229
 
251
230
  it 'should not have any bad rows' do
252
231
  expect(bad_rows_writer.read).to be_empty
253
232
  end
254
233
 
255
- it 'should not have any errors' do
256
- expect(error_writer.read).to be_empty
257
- end
258
-
259
234
  it 'should store the data with the given schema' do
260
235
  expect(AvroReader.new(writer).read).to eq(
261
236
  [
@@ -298,17 +273,13 @@ RSpec.describe CSV2Avro::Converter do
298
273
  end
299
274
 
300
275
  before do
301
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
276
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { write_defaults: true }, schema: schema).convert
302
277
  end
303
278
 
304
279
  it 'should not have any bad rows' do
305
280
  expect(bad_rows_writer.read).to be_empty
306
281
  end
307
282
 
308
- it 'should not have any errors' do
309
- expect(error_writer.read).to be_empty
310
- end
311
-
312
283
  it 'should store the data with the given schema' do
313
284
  expect(AvroReader.new(writer).read).to eq(
314
285
  [
@@ -348,7 +319,7 @@ RSpec.describe CSV2Avro::Converter do
348
319
  end
349
320
 
350
321
  before do
351
- CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
322
+ CSV2Avro::Converter.new(reader, avro_writer, bad_rows_writer, 'data.csv', { delimiter: "\t" }, schema: schema).convert
352
323
  end
353
324
 
354
325
  it 'should report the bad rows correctly' do
@@ -357,12 +328,6 @@ RSpec.describe CSV2Avro::Converter do
357
328
  )
358
329
  end
359
330
 
360
- it 'should have an error' do
361
- expect(error_writer.string).to eq(
362
- "L2: Missing value at name\nL5: Missing value at name\n"
363
- )
364
- end
365
-
366
331
  it 'should store the data with the given schema' do
367
332
  expect(AvroReader.new(writer).read).to eq(
368
333
  [
@@ -10,13 +10,10 @@ RSpec.describe CSV2Avro do
10
10
  context "Unquoted header" do
11
11
  before do
12
12
  ARGV.replace ['./spec/support/data.csv']
13
+ converter.convert
13
14
  end
14
15
 
15
16
  bad_rows_output = "L4: Missing value at name\nL7: Unable to parse\nL9: Missing value at id, Missing value at name\nL10: 'male-shoes' at id doesn't match the type '\"int\"', Missing value at name\n"
16
- it 'should write errors to STDERR' do
17
- expect { converter.convert }.to output(bad_rows_output).to_stderr
18
- end
19
-
20
17
  it 'should have bad rows' do
21
18
  File.open('./spec/support/data.bad', 'r') do |file|
22
19
  expect(file.read).to eq(bad_rows_output)
@@ -40,10 +37,7 @@ RSpec.describe CSV2Avro do
40
37
  context "Quoted header" do
41
38
  before do
42
39
  ARGV.replace ['./spec/support/data_quoted.csv']
43
- end
44
-
45
- it 'should write errors to STDERR' do
46
- expect { converter.convert }.to output("L4: Missing value at name\nL7: Unable to parse\n").to_stderr
40
+ converter.convert
47
41
  end
48
42
 
49
43
  it 'should have a bad row' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv2avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ableda
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-09-16 00:00:00.000000000 Z
12
+ date: 2015-11-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -95,6 +95,20 @@ dependencies:
95
95
  - - "~>"
96
96
  - !ruby/object:Gem::Version
97
97
  version: '1.7'
98
+ - !ruby/object:Gem::Dependency
99
+ name: logr
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: '0.1'
105
+ type: :runtime
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: '0.1'
98
112
  description: Convert CSV files to Avro like a boss.
99
113
  email:
100
114
  - scotty@secretsaucepartners.com