csv2avro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.dockerignore +1 -0
- data/.gitignore +15 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +48 -0
- data/Dockerfile +23 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +80 -0
- data/Rakefile +41 -0
- data/bin/csv2avro +58 -0
- data/csv2avro.gemspec +28 -0
- data/lib/avro_schema.rb +57 -0
- data/lib/csv2avro/avro_writer.rb +27 -0
- data/lib/csv2avro/converter.rb +125 -0
- data/lib/csv2avro/schema.rb +44 -0
- data/lib/csv2avro/version.rb +3 -0
- data/lib/csv2avro.rb +78 -0
- data/spec/csv2avro/converter_spec.rb +434 -0
- data/spec/csv2avro/schema_spec.rb +85 -0
- data/spec/csv2avro_spec.rb +38 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/support/avro_reader.rb +22 -0
- data/spec/support/data.csv +4 -0
- data/spec/support/schema.avsc +17 -0
- metadata +161 -0
@@ -0,0 +1,434 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe CSV2Avro::Converter do
|
4
|
+
describe '#read' do
|
5
|
+
context 'schema with string and integer columns' do
|
6
|
+
let(:schema_io) do
|
7
|
+
StringIO.new(
|
8
|
+
{
|
9
|
+
name: 'categories',
|
10
|
+
type: 'record',
|
11
|
+
fields: [
|
12
|
+
{ name: 'id', type: 'int' },
|
13
|
+
{ name: 'name', type: 'string' },
|
14
|
+
{ name: 'description', type: ['string', 'null'] }
|
15
|
+
]
|
16
|
+
}.to_json
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'separated with commas (csv)' do
|
21
|
+
let(:reader) do
|
22
|
+
StringIO.new(
|
23
|
+
csv_string = CSV.generate do |csv|
|
24
|
+
csv << %w[id name description]
|
25
|
+
csv << %w[1 dresses Dresses]
|
26
|
+
csv << %w[2 female-tops]
|
27
|
+
end
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
32
|
+
|
33
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
34
|
+
|
35
|
+
let(:bad_rows_writer) { StringIO.new }
|
36
|
+
|
37
|
+
let(:error_writer) { StringIO.new }
|
38
|
+
|
39
|
+
before do
|
40
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should not have any bad rows' do
|
44
|
+
expect(bad_rows_writer.read).to eq("")
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should not have any errors' do
|
48
|
+
expect(error_writer.read).to eq("")
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should store the data with the given schema' do
|
52
|
+
expect(AvroReader.new(writer).read).to eq(
|
53
|
+
[
|
54
|
+
{ 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
|
55
|
+
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
|
56
|
+
]
|
57
|
+
)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
context 'separated with tabs (tsv)' do
|
62
|
+
let(:reader) do
|
63
|
+
StringIO.new(
|
64
|
+
csv_string = CSV.generate({col_sep: "\t"}) do |csv|
|
65
|
+
csv << %w[id name description]
|
66
|
+
csv << %w[1 dresses Dresses]
|
67
|
+
csv << %w[2 female-tops]
|
68
|
+
end
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
73
|
+
|
74
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
75
|
+
|
76
|
+
let(:bad_rows_writer) { StringIO.new }
|
77
|
+
|
78
|
+
let(:error_writer) { StringIO.new }
|
79
|
+
|
80
|
+
before do
|
81
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'should not have any bad rows' do
|
85
|
+
expect(bad_rows_writer.read).to eq("")
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'should not have any errors' do
|
89
|
+
expect(error_writer.read).to eq("")
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should store the data with the given schema' do
|
93
|
+
expect(AvroReader.new(writer).read).to eq(
|
94
|
+
[
|
95
|
+
{ 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
|
96
|
+
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
|
97
|
+
]
|
98
|
+
)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
context 'schema with boolean and array columns' do
|
104
|
+
let(:schema_io) do
|
105
|
+
StringIO.new(
|
106
|
+
{
|
107
|
+
name: 'categories',
|
108
|
+
type: 'record',
|
109
|
+
fields: [
|
110
|
+
{ name: 'id', type: 'int' },
|
111
|
+
{ name: 'enabled', type: ['boolean', 'null'] },
|
112
|
+
{ name: 'image_links', type: [{ type: 'array', items: 'string' }, 'null'] }
|
113
|
+
]
|
114
|
+
}.to_json
|
115
|
+
)
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'separated with commas (default)' do
|
119
|
+
let(:reader) do
|
120
|
+
StringIO.new(
|
121
|
+
csv_string = CSV.generate({col_sep: "\t"}) do |csv|
|
122
|
+
csv << %w[id enabled image_links]
|
123
|
+
csv << %w[1 true http://www.images.com/dresses.jpeg]
|
124
|
+
csv << %w[2 false http://www.images.com/bras1.jpeg,http://www.images.com/bras2.jpeg]
|
125
|
+
end
|
126
|
+
)
|
127
|
+
end
|
128
|
+
|
129
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
130
|
+
|
131
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
132
|
+
|
133
|
+
let(:bad_rows_writer) { StringIO.new }
|
134
|
+
|
135
|
+
let(:error_writer) { StringIO.new }
|
136
|
+
|
137
|
+
before do
|
138
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should not have any bad rows' do
|
142
|
+
expect(bad_rows_writer.read).to eq("")
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'should not have any errors' do
|
146
|
+
expect(error_writer.read).to eq("")
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'should store the data with the given schema' do
|
150
|
+
expect(AvroReader.new(writer).read).to eq(
|
151
|
+
[
|
152
|
+
{ 'id'=>1, 'enabled'=>true, 'image_links'=>['http://www.images.com/dresses.jpeg'] },
|
153
|
+
{ 'id'=>2, 'enabled'=>false, 'image_links'=>['http://www.images.com/bras1.jpeg', 'http://www.images.com/bras2.jpeg'] }
|
154
|
+
]
|
155
|
+
)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
context 'separated with semicolons' do
|
160
|
+
let(:reader) do
|
161
|
+
StringIO.new(
|
162
|
+
csv_string = CSV.generate({col_sep: "\t"}) do |csv|
|
163
|
+
csv << %w[id enabled image_links]
|
164
|
+
csv << %w[1 true http://www.images.com/dresses.jpeg]
|
165
|
+
csv << %w[2 false http://www.images.com/bras1.jpeg;http://www.images.com/bras2.jpeg]
|
166
|
+
end
|
167
|
+
)
|
168
|
+
end
|
169
|
+
|
170
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
171
|
+
|
172
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
173
|
+
|
174
|
+
let(:bad_rows_writer) { StringIO.new }
|
175
|
+
|
176
|
+
let(:error_writer) { StringIO.new }
|
177
|
+
|
178
|
+
before do
|
179
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t", array_delimiter: ';' }, schema: schema).convert
|
180
|
+
end
|
181
|
+
|
182
|
+
it 'should not have any bad rows' do
|
183
|
+
expect(bad_rows_writer.read).to eq("")
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'should not have any errors' do
|
187
|
+
expect(error_writer.read).to eq("")
|
188
|
+
end
|
189
|
+
|
190
|
+
it 'should store the data with the given schema' do
|
191
|
+
expect(AvroReader.new(writer).read).to eq(
|
192
|
+
[
|
193
|
+
{ 'id'=>1, 'enabled'=>true, 'image_links'=>['http://www.images.com/dresses.jpeg'] },
|
194
|
+
{ 'id'=>2, 'enabled'=>false, 'image_links'=>['http://www.images.com/bras1.jpeg', 'http://www.images.com/bras2.jpeg'] }
|
195
|
+
]
|
196
|
+
)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
context 'shema with default vaules' do
|
202
|
+
let(:schema_io) do
|
203
|
+
StringIO.new(
|
204
|
+
{
|
205
|
+
name: 'product',
|
206
|
+
type: 'record',
|
207
|
+
fields: [
|
208
|
+
{ name: 'id', type: 'int' },
|
209
|
+
{ name: 'category', type: 'string', default: 'unknown' },
|
210
|
+
{ name: 'size_type', type: 'string', default: 'regular' },
|
211
|
+
{ name: 'enabled', type: ['boolean', 'null'], default: false }
|
212
|
+
]
|
213
|
+
}.to_json
|
214
|
+
)
|
215
|
+
end
|
216
|
+
|
217
|
+
let(:reader) do
|
218
|
+
StringIO.new(
|
219
|
+
csv_string = CSV.generate do |csv|
|
220
|
+
csv << %w[id category enabled]
|
221
|
+
csv << %w[1 dresses true]
|
222
|
+
csv << %w[2 ]
|
223
|
+
end
|
224
|
+
)
|
225
|
+
end
|
226
|
+
|
227
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
228
|
+
|
229
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
230
|
+
|
231
|
+
let(:bad_rows_writer) { StringIO.new }
|
232
|
+
|
233
|
+
let(:error_writer) { StringIO.new }
|
234
|
+
|
235
|
+
before do
|
236
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
|
237
|
+
end
|
238
|
+
|
239
|
+
it 'should not have any bad rows' do
|
240
|
+
expect(bad_rows_writer.read).to eq("")
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'should not have any errors' do
|
244
|
+
expect(error_writer.read).to eq("")
|
245
|
+
end
|
246
|
+
|
247
|
+
it 'should store the defaults data' do
|
248
|
+
expect(AvroReader.new(writer).read).to eq(
|
249
|
+
[
|
250
|
+
{ 'id'=>1, 'category'=>'dresses', 'size_type'=> 'regular' ,'enabled'=>true },
|
251
|
+
{ 'id'=>2, 'category'=>'unknown', 'size_type'=> 'regular' ,'enabled'=>false }
|
252
|
+
]
|
253
|
+
)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
context 'schema with aliased fields' do
|
258
|
+
let(:reader) do
|
259
|
+
StringIO.new(
|
260
|
+
csv_string = CSV.generate do |csv|
|
261
|
+
csv << %w[id color_id]
|
262
|
+
csv << %w[1 1_red]
|
263
|
+
csv << %w[2 2_blue]
|
264
|
+
end
|
265
|
+
)
|
266
|
+
end
|
267
|
+
|
268
|
+
let(:schema_io) do
|
269
|
+
StringIO.new(
|
270
|
+
{
|
271
|
+
name: 'product',
|
272
|
+
type: 'record',
|
273
|
+
fields: [
|
274
|
+
{ name: 'id', type: 'int' },
|
275
|
+
{ name: 'look_id', type: 'string', aliases: ['color_id', 'photo_group_id'] }
|
276
|
+
]
|
277
|
+
}.to_json
|
278
|
+
)
|
279
|
+
end
|
280
|
+
|
281
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
282
|
+
|
283
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
284
|
+
|
285
|
+
let(:bad_rows_writer) { StringIO.new }
|
286
|
+
|
287
|
+
let(:error_writer) { StringIO.new }
|
288
|
+
|
289
|
+
before do
|
290
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, {}, schema: schema).convert
|
291
|
+
end
|
292
|
+
|
293
|
+
it 'should not have any bad rows' do
|
294
|
+
expect(bad_rows_writer.read).to eq("")
|
295
|
+
end
|
296
|
+
|
297
|
+
it 'should not have any errors' do
|
298
|
+
expect(error_writer.read).to eq("")
|
299
|
+
end
|
300
|
+
|
301
|
+
it 'should store the data with the given schema' do
|
302
|
+
expect(AvroReader.new(writer).read).to eq(
|
303
|
+
[
|
304
|
+
{'id'=>1, 'look_id'=>'1_red'},
|
305
|
+
{'id'=>2, 'look_id'=>'2_blue'}
|
306
|
+
]
|
307
|
+
)
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
context 'schema with enum column' do
|
312
|
+
let(:schema_io) do
|
313
|
+
StringIO.new(
|
314
|
+
{
|
315
|
+
name: 'product',
|
316
|
+
type: 'record',
|
317
|
+
fields: [
|
318
|
+
{ name: 'id', type: 'int' },
|
319
|
+
{ name: 'size_type', type:
|
320
|
+
[
|
321
|
+
{
|
322
|
+
type:'enum', name:'size_type_values', symbols:['regular', 'petite', 'plus', 'tall', 'big_and_tall', 'maternity']
|
323
|
+
}, 'null'
|
324
|
+
], default: 'regular'
|
325
|
+
}
|
326
|
+
]
|
327
|
+
}.to_json
|
328
|
+
)
|
329
|
+
end
|
330
|
+
|
331
|
+
let(:reader) do
|
332
|
+
StringIO.new(
|
333
|
+
csv_string = CSV.generate do |csv|
|
334
|
+
csv << %w[id size_type]
|
335
|
+
csv << %w[1 regular]
|
336
|
+
csv << %W[2 big\sand\stall]
|
337
|
+
csv << %w[3 ]
|
338
|
+
end
|
339
|
+
)
|
340
|
+
end
|
341
|
+
|
342
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
343
|
+
|
344
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
345
|
+
|
346
|
+
let(:bad_rows_writer) { StringIO.new }
|
347
|
+
|
348
|
+
let(:error_writer) { StringIO.new }
|
349
|
+
|
350
|
+
before do
|
351
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { write_defaults: true }, schema: schema).convert
|
352
|
+
end
|
353
|
+
|
354
|
+
it 'should not have any bad rows' do
|
355
|
+
expect(bad_rows_writer.read).to eq("")
|
356
|
+
end
|
357
|
+
|
358
|
+
it 'should not have any errors' do
|
359
|
+
expect(error_writer.read).to eq("")
|
360
|
+
end
|
361
|
+
|
362
|
+
it 'should store the data with the given schema' do
|
363
|
+
expect(AvroReader.new(writer).read).to eq(
|
364
|
+
[
|
365
|
+
{ 'id'=>1, 'size_type'=>'regular' },
|
366
|
+
{ 'id'=>2, 'size_type'=>'big_and_tall' },
|
367
|
+
{ 'id'=>3, 'size_type'=>'regular' }
|
368
|
+
]
|
369
|
+
)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
context 'data with bad rows' do
|
374
|
+
let(:schema_io) do
|
375
|
+
StringIO.new(
|
376
|
+
{
|
377
|
+
name: 'categories',
|
378
|
+
type: 'record',
|
379
|
+
fields: [
|
380
|
+
{ name: 'id', type: 'int' },
|
381
|
+
{ name: 'name', type: 'string', aliases: ['title'] },
|
382
|
+
{ name: 'description', type: ['string', 'null'] }
|
383
|
+
]
|
384
|
+
}.to_json
|
385
|
+
)
|
386
|
+
end
|
387
|
+
|
388
|
+
let(:reader) do
|
389
|
+
StringIO.new(
|
390
|
+
csv_string = CSV.generate({col_sep: "\t"}) do |csv|
|
391
|
+
csv << %w[id title description]
|
392
|
+
csv << ['1', nil, 'dresses']
|
393
|
+
csv << %w[2 female-tops]
|
394
|
+
csv << %w[3 female-bottoms]
|
395
|
+
csv << ['4', nil, 'female-shoes']
|
396
|
+
end
|
397
|
+
)
|
398
|
+
end
|
399
|
+
|
400
|
+
let(:schema) { CSV2Avro::Schema.new(schema_io) }
|
401
|
+
|
402
|
+
let(:writer) { CSV2Avro::AvroWriter.new(StringIO.new, schema) }
|
403
|
+
|
404
|
+
let(:bad_rows_writer) { StringIO.new }
|
405
|
+
|
406
|
+
let(:error_writer) { StringIO.new }
|
407
|
+
|
408
|
+
before do
|
409
|
+
CSV2Avro::Converter.new(reader, writer, bad_rows_writer, error_writer, { delimiter: "\t" }, schema: schema).convert
|
410
|
+
end
|
411
|
+
|
412
|
+
it 'should have the bad data in the original form' do
|
413
|
+
expect(bad_rows_writer.string).to eq(
|
414
|
+
"id\ttitle\tdescription\n1\t\tdresses\n4\t\tfemale-shoes\n"
|
415
|
+
)
|
416
|
+
end
|
417
|
+
|
418
|
+
it 'should have an error' do
|
419
|
+
expect(error_writer.string).to eq(
|
420
|
+
"line 2: Missing value at name\nline 5: Missing value at name\n"
|
421
|
+
)
|
422
|
+
end
|
423
|
+
|
424
|
+
it 'should store the data with the given schema' do
|
425
|
+
expect(AvroReader.new(writer).read).to eq(
|
426
|
+
[
|
427
|
+
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil },
|
428
|
+
{ 'id'=>3, 'name'=>'female-bottoms', 'description'=>nil }
|
429
|
+
]
|
430
|
+
)
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe CSV2Avro::Schema do
|
4
|
+
describe '#defaults' do
|
5
|
+
context 'shema with default values' do
|
6
|
+
let(:schema_io) do
|
7
|
+
StringIO.new(
|
8
|
+
{
|
9
|
+
name: 'product',
|
10
|
+
type: 'record',
|
11
|
+
fields: [
|
12
|
+
{ name: 'id', type: 'int' },
|
13
|
+
{ name: 'category', type: 'string', default: 'unknown' },
|
14
|
+
{ name: 'enabled', type: ['boolean', 'null'], default: false }
|
15
|
+
]
|
16
|
+
}.to_json
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
subject(:schema) do
|
21
|
+
CSV2Avro::Schema.new(schema_io)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should return a hash with the field - default value pairs' do
|
25
|
+
expect(schema.defaults).to eq({ 'category'=>'unknown', 'enabled'=>false })
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#types' do
|
31
|
+
context 'shema with different types' do
|
32
|
+
let(:schema_io) do
|
33
|
+
StringIO.new(
|
34
|
+
{
|
35
|
+
name: 'product',
|
36
|
+
type: 'record',
|
37
|
+
fields: [
|
38
|
+
{ name: 'id', type: 'int' },
|
39
|
+
{ name: 'category', type: 'string' },
|
40
|
+
{ name: 'reviews', type: { type: 'array', items: 'string' }},
|
41
|
+
{ name: 'enabled', type: ['boolean', 'null'] },
|
42
|
+
{ name: 'availability', type: {
|
43
|
+
type:'enum', name:'availability_values', symbols:['in_stock', 'out_of_stock', 'preorder']
|
44
|
+
}, default: 'in_stock'
|
45
|
+
}
|
46
|
+
]
|
47
|
+
}.to_json
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
subject(:schema) do
|
52
|
+
CSV2Avro::Schema.new(schema_io)
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'should return a hash with the field - default value pairs' do
|
56
|
+
expect(schema.types).to eq({ 'id'=>:int, 'category'=>:string, 'reviews'=>:array, 'enabled'=>:boolean, 'availability'=>:enum })
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe '#aliases' do
|
62
|
+
context 'shema with aliases' do
|
63
|
+
let(:schema_io) do
|
64
|
+
StringIO.new(
|
65
|
+
{
|
66
|
+
name: 'product',
|
67
|
+
type: 'record',
|
68
|
+
fields: [
|
69
|
+
{ name: 'id', type: 'int' },
|
70
|
+
{ name: 'look_id', type: 'string', aliases: ['color_id', 'photo_group_id'] }
|
71
|
+
]
|
72
|
+
}.to_json
|
73
|
+
)
|
74
|
+
end
|
75
|
+
|
76
|
+
subject(:schema) do
|
77
|
+
CSV2Avro::Schema.new(schema_io)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'should return a hash with the alias - name mapping' do
|
81
|
+
expect(schema.aliases).to eq({ 'color_id'=>'look_id', 'photo_group_id'=>'look_id' })
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe CSV2Avro do
|
4
|
+
describe '#convert' do
|
5
|
+
let(:options) do
|
6
|
+
{
|
7
|
+
schema: './spec/support/schema.avsc'
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
subject(:converter) do
|
12
|
+
ARGV.replace ['./spec/support/data.csv']
|
13
|
+
|
14
|
+
CSV2Avro.new(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should write the problems to STDERR' do
|
18
|
+
expect { converter.convert }.to output("line 4: Missing value at name\n").to_stderr
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should have a bad row' do
|
22
|
+
File.open('./spec/support/data.bad.csv', 'r') do |file|
|
23
|
+
expect(file.read).to eq("id,name,description\n3,,Bras\n")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should contain the avro data' do
|
28
|
+
File.open('./spec/support/data.avro', 'r') do |file|
|
29
|
+
expect(AvroReader.new(file).read).to eq(
|
30
|
+
[
|
31
|
+
{ 'id'=>1, 'name'=>'dresses', 'description'=>'Dresses' },
|
32
|
+
{ 'id'=>2, 'name'=>'female-tops', 'description'=>nil }
|
33
|
+
]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'csv2avro'
|
2
|
+
require 'csv2avro/converter'
|
3
|
+
require 'csv2avro/avro_writer'
|
4
|
+
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
Dir[File.dirname(__FILE__) + '/support/**/*.rb'].each {|f| require f }
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
config.after(:all) do
|
11
|
+
Dir["./spec/support/*.avro", "./spec/support/*.bad*"].each do |file|
|
12
|
+
File.delete(file)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class AvroReader
|
2
|
+
attr_reader :io
|
3
|
+
|
4
|
+
def initialize(io, schema=nil)
|
5
|
+
@io = io
|
6
|
+
@reader = if schema
|
7
|
+
schema_file = Avro::Schema.parse(schema)
|
8
|
+
Avro::IO::DatumReader.new(nil, schema_file)
|
9
|
+
else
|
10
|
+
Avro::IO::DatumReader.new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def read
|
15
|
+
dr = Avro::DataFile::Reader.new(io, @reader)
|
16
|
+
|
17
|
+
rows = []
|
18
|
+
dr.each { |record| rows << record }
|
19
|
+
|
20
|
+
rows
|
21
|
+
end
|
22
|
+
end
|