avro 1.9.2 → 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_io.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -90,7 +91,10 @@ EOS
90
91
  "name": "Test",
91
92
  "fields": [{"name": "ts",
92
93
  "type": {"type": "long",
93
- "logicalType": "timestamp-micros"}}]}
94
+ "logicalType": "timestamp-micros"}},
95
+ {"name": "ts2",
96
+ "type": {"type": "long",
97
+ "logicalType": "timestamp-millis"}}]}
94
98
  EOS
95
99
  check(record_schema)
96
100
  end
@@ -112,6 +116,13 @@ EOS
112
116
  check_default(enum_schema, '"B"', "B")
113
117
  end
114
118
 
119
+ def test_enum_with_default
120
+ enum_schema = '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "default": "A"}'
121
+ check(enum_schema)
122
+ # Field default is used for missing field.
123
+ check_default(enum_schema, '"B"', "B")
124
+ end
125
+
115
126
  def test_recursive
116
127
  recursive_schema = <<EOS
117
128
  {"type": "record",
@@ -158,6 +169,17 @@ EOS
158
169
  check_default(fixed_schema, '"a"', "a")
159
170
  end
160
171
 
172
+ def test_record_variable_key_types
173
+ datum = { sym: "foo", "str"=>"bar"}
174
+ ret_val = { "sym"=> "foo", "str"=>"bar"}
175
+ schema = Schema.parse('{"type":"record", "name":"rec", "fields":[{"name":"sym", "type":"string"}, {"name":"str", "type":"string"}]}')
176
+
177
+ writer, _encoder, _datum_writer = write_datum(datum, schema)
178
+
179
+ ret_datum = read_datum(writer, schema)
180
+ assert_equal ret_datum, ret_val
181
+ end
182
+
161
183
  def test_record_with_nil
162
184
  schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
163
185
  assert_raise(Avro::IO::AvroTypeError) do
@@ -196,7 +218,7 @@ EOS
196
218
  [64, '80 01'],
197
219
  [8192, '80 80 01'],
198
220
  [-8193, '81 80 01'],
199
- ]
221
+ ].freeze
200
222
 
201
223
  def avro_hexlify(reader)
202
224
  bytes = []
@@ -245,46 +267,46 @@ EOS
245
267
 
246
268
  def test_utf8_string_encoding
247
269
  [
248
- "\xC3".force_encoding('ISO-8859-1'),
249
- "\xC3\x83".force_encoding('UTF-8')
270
+ String.new("\xC3", encoding: 'ISO-8859-1'),
271
+ String.new("\xC3\x83", encoding: 'UTF-8')
250
272
  ].each do |value|
251
- output = ''.force_encoding('BINARY')
273
+ output = String.new('', encoding: 'BINARY')
252
274
  encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
253
275
  datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
254
276
  datum_writer.write(value, encoder)
255
277
 
256
- assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
278
+ assert_equal String.new("\x04\xc3\x83", encoding: 'BINARY'), output
257
279
  end
258
280
  end
259
281
 
260
282
  def test_bytes_encoding
261
283
  [
262
- "\xC3\x83".force_encoding('BINARY'),
263
- "\xC3\x83".force_encoding('ISO-8859-1'),
264
- "\xC3\x83".force_encoding('UTF-8')
284
+ String.new("\xC3\x83", encoding: 'BINARY'),
285
+ String.new("\xC3\x83", encoding: 'ISO-8859-1'),
286
+ String.new("\xC3\x83", encoding: 'UTF-8')
265
287
  ].each do |value|
266
- output = ''.force_encoding('BINARY')
288
+ output = String.new('', encoding: 'BINARY')
267
289
  encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
268
290
  datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
269
291
  datum_writer.write(value, encoder)
270
292
 
271
- assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
293
+ assert_equal String.new("\x04\xc3\x83", encoding: 'BINARY'), output
272
294
  end
273
295
  end
274
296
 
275
297
  def test_fixed_encoding
276
298
  [
277
- "\xC3\x83".force_encoding('BINARY'),
278
- "\xC3\x83".force_encoding('ISO-8859-1'),
279
- "\xC3\x83".force_encoding('UTF-8')
299
+ String.new("\xC3\x83", encoding: 'BINARY'),
300
+ String.new("\xC3\x83", encoding: 'ISO-8859-1'),
301
+ String.new("\xC3\x83", encoding: 'UTF-8')
280
302
  ].each do |value|
281
- output = ''.force_encoding('BINARY')
303
+ output = String.new('', encoding: 'BINARY')
282
304
  encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
283
305
  schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
284
306
  datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
285
307
  datum_writer.write(value, encoder)
286
308
 
287
- assert_equal "\xc3\x83".force_encoding('BINARY'), output
309
+ assert_equal String.new("\xC3\x83", encoding: 'BINARY'), output
288
310
  end
289
311
  end
290
312
 
@@ -390,6 +412,50 @@ EOS
390
412
  assert_equal(incorrect, 0)
391
413
  end
392
414
 
415
+ def test_unknown_enum_symbol
416
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
417
+ {
418
+ "type": "enum",
419
+ "name": "test",
420
+ "symbols": ["B", "C"]
421
+ }
422
+ SCHEMA
423
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
424
+ {
425
+ "type": "enum",
426
+ "name": "test",
427
+ "symbols": ["A", "B"]
428
+ }
429
+ SCHEMA
430
+ datum_to_write = "C"
431
+ writer, * = write_datum(datum_to_write, writers_schema)
432
+ datum_read = read_datum(writer, writers_schema, readers_schema)
433
+ # Ruby implementation did not follow the spec and returns the writer's symbol here
434
+ assert_equal(datum_read, datum_to_write)
435
+ end
436
+
437
+ def test_unknown_enum_symbol_with_enum_default
438
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
439
+ {
440
+ "type": "enum",
441
+ "name": "test",
442
+ "symbols": ["B", "C"]
443
+ }
444
+ SCHEMA
445
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
446
+ {
447
+ "type": "enum",
448
+ "name": "test",
449
+ "symbols": ["A", "B", "UNKNOWN"],
450
+ "default": "UNKNOWN"
451
+ }
452
+ SCHEMA
453
+ datum_to_write = "C"
454
+ writer, * = write_datum(datum_to_write, writers_schema)
455
+ datum_read = read_datum(writer, writers_schema, readers_schema)
456
+ assert_equal(datum_read, "UNKNOWN")
457
+ end
458
+
393
459
  def test_array_schema_promotion
394
460
  writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
395
461
  readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
@@ -408,6 +474,36 @@ EOS
408
474
  assert_equal(datum_read, datum_to_write)
409
475
  end
410
476
 
477
+ def test_aliased
478
+ writers_schema = Avro::Schema.parse(<<-SCHEMA)
479
+ {"type":"record", "name":"Rec1", "fields":[
480
+ {"name":"field1", "type":"int"}
481
+ ]}
482
+ SCHEMA
483
+ readers_schema = Avro::Schema.parse(<<-SCHEMA)
484
+ {"type":"record", "name":"Rec2", "aliases":["Rec1"], "fields":[
485
+ {"name":"field2", "aliases":["field1"], "type":"int"}
486
+ ]}
487
+ SCHEMA
488
+ writer, * = write_datum({ 'field1' => 1 }, writers_schema)
489
+ datum_read = read_datum(writer, writers_schema, readers_schema)
490
+ assert_equal(datum_read, { 'field2' => 1 })
491
+ end
492
+
493
+ def test_big_decimal_datum_for_float
494
+ writers_schema = Avro::Schema.parse('"float"')
495
+ writer, * = write_datum(BigDecimal('1.2'), writers_schema)
496
+ datum_read = read_datum(writer, writers_schema)
497
+ assert_in_delta(1.2, datum_read)
498
+ end
499
+
500
+ def test_big_decimal_datum_for_double
501
+ writers_schema = Avro::Schema.parse('"double"')
502
+ writer, * = write_datum(BigDecimal("1.2"), writers_schema)
503
+ datum_read = read_datum(writer, writers_schema)
504
+ assert_in_delta(1.2, datum_read)
505
+ end
506
+
411
507
  def test_snappy_backward_compat
412
508
  # a snappy-compressed block payload without the checksum
413
509
  # this has no back-references, just one literal so the last 9
@@ -486,7 +582,7 @@ EOS
486
582
  datum = randomdata.next
487
583
  assert validate(schm, datum), 'datum is not valid for schema'
488
584
  w = Avro::IO::DatumWriter.new(schm)
489
- writer = StringIO.new "", "w"
585
+ writer = StringIO.new(+"", "w")
490
586
  w.write(datum, Avro::IO::BinaryEncoder.new(writer))
491
587
  r = datum_reader(schm)
492
588
  reader = StringIO.new(writer.string)
@@ -1,4 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  # Licensed to the Apache Software Foundation (ASF) under one
3
4
  # or more contributor license agreements. See the NOTICE file
4
5
  # distributed with this work for additional information
@@ -16,6 +17,7 @@
16
17
  # limitations under the License.
17
18
 
18
19
  require 'test_help'
20
+ require 'memory_profiler'
19
21
 
20
22
  class TestLogicalTypes < Test::Unit::TestCase
21
23
  def test_int_date
@@ -98,8 +100,143 @@ class TestLogicalTypes < Test::Unit::TestCase
98
100
  assert_equal 'duration', schema.logical_type
99
101
  end
100
102
 
103
+ def test_bytes_decimal
104
+ schema = Avro::Schema.parse <<-SCHEMA
105
+ { "type": "bytes", "logicalType": "decimal", "precision": 9, "scale": 6 }
106
+ SCHEMA
107
+
108
+ assert_equal 'decimal', schema.logical_type
109
+ assert_equal 9, schema.precision
110
+ assert_equal 6, schema.scale
111
+
112
+ assert_encode_and_decode BigDecimal('-3.4562'), schema
113
+ assert_encode_and_decode BigDecimal('3.4562'), schema
114
+ assert_encode_and_decode 15.123, schema
115
+ assert_encode_and_decode 15, schema
116
+ assert_encode_and_decode BigDecimal('0.123456'), schema
117
+ assert_encode_and_decode BigDecimal('0'), schema
118
+ assert_encode_and_decode BigDecimal('1'), schema
119
+ assert_encode_and_decode BigDecimal('-1'), schema
120
+
121
+ assert_raise ArgumentError do
122
+ type = Avro::LogicalTypes::BytesDecimal.new(schema)
123
+ type.encode('1.23')
124
+ end
125
+ end
126
+
127
+ def test_bytes_decimal_range_errors
128
+ schema = Avro::Schema.parse <<-SCHEMA
129
+ { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
130
+ SCHEMA
131
+
132
+ type = Avro::LogicalTypes::BytesDecimal.new(schema)
133
+
134
+ assert_raises RangeError do
135
+ type.encode(BigDecimal('345'))
136
+ end
137
+
138
+ assert_raises RangeError do
139
+ type.encode(BigDecimal('1.5342'))
140
+ end
141
+
142
+ assert_raises RangeError do
143
+ type.encode(BigDecimal('-1.5342'))
144
+ end
145
+
146
+ assert_raises RangeError do
147
+ type.encode(BigDecimal('-100.2'))
148
+ end
149
+
150
+ assert_raises RangeError do
151
+ type.encode(BigDecimal('-99.991'))
152
+ end
153
+ end
154
+
155
+ def test_bytes_decimal_conversion
156
+ schema = Avro::Schema.parse <<-SCHEMA
157
+ { "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
158
+ SCHEMA
159
+
160
+ type = Avro::LogicalTypes::BytesDecimal.new(schema)
161
+
162
+ enc = "\xcb\x43\x38".dup.force_encoding('BINARY')
163
+ assert_equal enc, type.encode(BigDecimal('-3.4562'))
164
+ assert_equal BigDecimal('-3.4562'), type.decode(enc)
165
+
166
+ assert_equal "\x34\xbc\xc8".dup.force_encoding('BINARY'), type.encode(BigDecimal('3.4562'))
167
+ assert_equal BigDecimal('3.4562'), type.decode("\x34\xbc\xc8".dup.force_encoding('BINARY'))
168
+
169
+ assert_equal "\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'), type.encode(BigDecimal('456123.123456'))
170
+ assert_equal BigDecimal('456123.123456'), type.decode("\x6a\x33\x0e\x87\x00".dup.force_encoding('BINARY'))
171
+ end
172
+
173
+ def test_logical_type_with_schema
174
+ exception = assert_raises(ArgumentError) do
175
+ Avro::LogicalTypes::LogicalTypeWithSchema.new(nil)
176
+ end
177
+ assert_equal exception.to_s, 'schema is required'
178
+
179
+ schema = Avro::Schema.parse <<-SCHEMA
180
+ { "type": "bytes", "logicalType": "decimal", "precision": 12, "scale": 6 }
181
+ SCHEMA
182
+
183
+ assert_nothing_raised do
184
+ Avro::LogicalTypes::LogicalTypeWithSchema.new(schema)
185
+ end
186
+
187
+ assert_raises NotImplementedError do
188
+ Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).encode(BigDecimal('2'))
189
+ end
190
+
191
+ assert_raises NotImplementedError do
192
+ Avro::LogicalTypes::LogicalTypeWithSchema.new(schema).decode('foo')
193
+ end
194
+ end
195
+
196
+ def test_bytes_decimal_object_allocations_encode
197
+ schema = Avro::Schema.parse <<-SCHEMA
198
+ { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
199
+ SCHEMA
200
+
201
+ type = Avro::LogicalTypes::BytesDecimal.new(schema)
202
+
203
+ positive_value = BigDecimal('5.2')
204
+ negative_value = BigDecimal('-5.2')
205
+
206
+ [positive_value, negative_value].each do |value|
207
+ report = MemoryProfiler.report do
208
+ type.encode(value)
209
+ end
210
+
211
+ assert_equal 5, report.total_allocated
212
+ # Ruby 2.7 does not retain anything. Ruby 2.6 retains 1
213
+ assert_operator 1, :>=, report.total_retained
214
+ end
215
+ end
216
+
217
+ def test_bytes_decimal_object_allocations_decode
218
+ schema = Avro::Schema.parse <<-SCHEMA
219
+ { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 }
220
+ SCHEMA
221
+
222
+ type = Avro::LogicalTypes::BytesDecimal.new(schema)
223
+
224
+ positive_enc = "\x02\b".dup.force_encoding('BINARY')
225
+ negative_enc = "\xFD\xF8".dup.force_encoding('BINARY')
226
+
227
+ [positive_enc, negative_enc].each do |encoded|
228
+ report = MemoryProfiler.report do
229
+ type.decode(encoded)
230
+ end
231
+
232
+ assert_equal 5, report.total_allocated
233
+ # Ruby 2.7 does not retain anything. Ruby 2.6 retains 1
234
+ assert_operator 1, :>=, report.total_retained
235
+ end
236
+ end
237
+
101
238
  def encode(datum, schema)
102
- buffer = StringIO.new("")
239
+ buffer = StringIO.new
103
240
  encoder = Avro::IO::BinaryEncoder.new(buffer)
104
241
 
105
242
  datum_writer = Avro::IO::DatumWriter.new(schema)
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # Licensed to the Apache Software Foundation (ASF) under one
2
3
  # or more contributor license agreements. See the NOTICE file
3
4
  # distributed with this work for additional information
@@ -184,7 +185,7 @@ EOS
184
185
  }
185
186
  }
186
187
  EOS
187
- ]
188
+ ].freeze
188
189
 
189
190
  Protocol = Avro::Protocol
190
191
  def test_parse