avro 1.8.1 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,543 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestSchemaCompatibility < Test::Unit::TestCase
20
+
21
+ def test_primitive_schema_compatibility
22
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
23
+ assert_true(can_read?(send("#{schema_type}_schema"), send("#{schema_type}_schema")))
24
+ end
25
+ end
26
+
27
+ def test_compatible_reader_writer_pairs
28
+ [
29
+ long_schema, int_schema,
30
+ float_schema, int_schema,
31
+ float_schema, long_schema,
32
+ double_schema, long_schema,
33
+ double_schema, int_schema,
34
+ double_schema, float_schema,
35
+
36
+ int_array_schema, int_array_schema,
37
+ long_array_schema, int_array_schema,
38
+ int_map_schema, int_map_schema,
39
+ long_map_schema, int_map_schema,
40
+
41
+ enum1_ab_schema, enum1_ab_schema,
42
+ enum1_ab_aliased_schema, enum1_ab_schema,
43
+ enum1_abc_schema, enum1_ab_schema,
44
+ enum1_ab_default_schema, enum1_abc_schema,
45
+
46
+ fixed1_schema, fixed1_schema,
47
+ fixed1_aliased_schema, fixed1_schema,
48
+
49
+ string_schema, bytes_schema,
50
+ bytes_schema, string_schema,
51
+
52
+ empty_union_schema, empty_union_schema,
53
+ int_union_schema, int_union_schema,
54
+ int_string_union_schema, string_int_union_schema,
55
+ int_union_schema, empty_union_schema,
56
+ long_union_schema, int_union_schema,
57
+
58
+ int_union_schema, int_schema,
59
+ int_schema, int_union_schema,
60
+
61
+ empty_record1_schema, empty_record1_schema,
62
+ empty_record1_schema, a_int_record1_schema,
63
+ empty_record1_aliased_schema, empty_record1_schema,
64
+
65
+ a_int_record1_schema, a_int_record1_schema,
66
+ a_dint_record1_schema, a_int_record1_schema,
67
+ a_dint_record1_schema, a_dint_record1_schema,
68
+ a_int_record1_schema, a_dint_record1_schema,
69
+
70
+ a_long_record1_schema, a_int_record1_schema,
71
+
72
+ a_int_record1_schema, a_int_b_int_record1_schema,
73
+ a_dint_record1_schema, a_int_b_int_record1_schema,
74
+
75
+ a_int_b_dint_record1_schema, a_int_record1_schema,
76
+ a_dint_b_dint_record1_schema, empty_record1_schema,
77
+ a_dint_b_dint_record1_schema, a_int_record1_schema,
78
+ a_int_b_int_record1_schema, a_dint_b_dint_record1_schema,
79
+
80
+ int_list_record_schema, int_list_record_schema,
81
+ long_list_record_schema, long_list_record_schema,
82
+ long_list_record_schema, int_list_record_schema,
83
+
84
+ null_schema, null_schema,
85
+
86
+ nested_optional_record, nested_record
87
+ ].each_slice(2) do |(reader, writer)|
88
+ assert_true(can_read?(writer, reader), "expecting #{reader} to read #{writer}")
89
+ end
90
+ end
91
+
92
+ def test_broken
93
+ assert_false(can_read?(int_string_union_schema, int_union_schema))
94
+ end
95
+
96
+ def test_incompatible_reader_writer_pairs
97
+ [
98
+ null_schema, int_schema,
99
+ null_schema, long_schema,
100
+
101
+ boolean_schema, int_schema,
102
+
103
+ int_schema, null_schema,
104
+ int_schema, boolean_schema,
105
+ int_schema, long_schema,
106
+ int_schema, float_schema,
107
+ int_schema, double_schema,
108
+
109
+ long_schema, float_schema,
110
+ long_schema, double_schema,
111
+
112
+ float_schema, double_schema,
113
+
114
+ string_schema, boolean_schema,
115
+ string_schema, int_schema,
116
+
117
+ bytes_schema, null_schema,
118
+ bytes_schema, int_schema,
119
+
120
+ int_array_schema, long_array_schema,
121
+ int_map_schema, int_array_schema,
122
+ int_array_schema, int_map_schema,
123
+ int_map_schema, long_map_schema,
124
+
125
+ enum1_ab_schema, enum1_abc_schema,
126
+ enum1_ab_schema, enum1_ab_aliased_schema,
127
+ enum1_bc_schema, enum1_abc_schema,
128
+
129
+ enum1_ab_schema, enum2_ab_schema,
130
+ int_schema, enum2_ab_schema,
131
+ enum2_ab_schema, int_schema,
132
+
133
+ fixed1_schema, fixed2_schema,
134
+ fixed1_schema, fixed1_size3_schema,
135
+ fixed1_schema, fixed1_aliased_schema,
136
+
137
+ int_union_schema, int_string_union_schema,
138
+ string_union_schema, int_string_union_schema,
139
+
140
+ empty_record2_schema, empty_record1_schema,
141
+ empty_record1_schema, empty_record1_aliased_schema,
142
+ a_int_record1_schema, empty_record1_schema,
143
+ a_int_b_dint_record1_schema, empty_record1_schema,
144
+
145
+ int_list_record_schema, long_list_record_schema,
146
+
147
+ null_schema, int_schema,
148
+
149
+ nested_record, nested_optional_record
150
+ ].each_slice(2) do |(reader, writer)|
151
+ assert_false(can_read?(writer, reader), "expecting #{reader} not to read #{writer}")
152
+ end
153
+ end
154
+
155
+ def writer_schema
156
+ Avro::Schema.parse <<-SCHEMA
157
+ {"type":"record", "name":"Record", "fields":[
158
+ {"name":"oldfield1", "type":"int"},
159
+ {"name":"oldfield2", "type":"string"}
160
+ ]}
161
+ SCHEMA
162
+ end
163
+
164
+ def test_missing_field
165
+ reader_schema = Avro::Schema.parse <<-SCHEMA
166
+ {"type":"record", "name":"Record", "fields":[
167
+ {"name":"oldfield1", "type":"int"}
168
+ ]}
169
+ SCHEMA
170
+ assert_true(can_read?(writer_schema, reader_schema))
171
+ assert_false(can_read?(reader_schema, writer_schema))
172
+ end
173
+
174
+ def test_missing_second_field
175
+ reader_schema = Avro::Schema.parse <<-SCHEMA
176
+ {"type":"record", "name":"Record", "fields":[
177
+ {"name":"oldfield2", "type":"string"}
178
+ ]}
179
+ SCHEMA
180
+ assert_true(can_read?(writer_schema, reader_schema))
181
+ assert_false(can_read?(reader_schema, writer_schema))
182
+ end
183
+
184
+ def test_aliased_field
185
+ reader_schema = Avro::Schema.parse(<<-SCHEMA)
186
+ {"type":"record", "name":"Record", "fields":[
187
+ {"name":"newname1", "aliases":["oldfield1"], "type":"int"},
188
+ {"name":"oldfield2", "type":"string"}
189
+ ]}
190
+ SCHEMA
191
+ assert_true(can_read?(writer_schema, reader_schema))
192
+ assert_false(can_read?(reader_schema, writer_schema))
193
+ end
194
+
195
+ def test_all_fields
196
+ reader_schema = Avro::Schema.parse <<-SCHEMA
197
+ {"type":"record", "name":"Record", "fields":[
198
+ {"name":"oldfield1", "type":"int"},
199
+ {"name":"oldfield2", "type":"string"}
200
+ ]}
201
+ SCHEMA
202
+ assert_true(can_read?(writer_schema, reader_schema))
203
+ assert_true(can_read?(reader_schema, writer_schema))
204
+ end
205
+
206
+ def test_new_field_with_default
207
+ reader_schema = Avro::Schema.parse <<-SCHEMA
208
+ {"type":"record", "name":"Record", "fields":[
209
+ {"name":"oldfield1", "type":"int"},
210
+ {"name":"newfield1", "type":"int", "default":42}
211
+ ]}
212
+ SCHEMA
213
+ assert_true(can_read?(writer_schema, reader_schema))
214
+ assert_false(can_read?(reader_schema, writer_schema))
215
+ end
216
+
217
+ def test_new_field
218
+ reader_schema = Avro::Schema.parse <<-SCHEMA
219
+ {"type":"record", "name":"Record", "fields":[
220
+ {"name":"oldfield1", "type":"int"},
221
+ {"name":"newfield1", "type":"int"}
222
+ ]}
223
+ SCHEMA
224
+ assert_false(can_read?(writer_schema, reader_schema))
225
+ assert_false(can_read?(reader_schema, writer_schema))
226
+ end
227
+
228
+ def test_array_writer_schema
229
+ valid_reader = string_array_schema
230
+ invalid_reader = string_map_schema
231
+
232
+ assert_true(can_read?(string_array_schema, valid_reader))
233
+ assert_false(can_read?(string_array_schema, invalid_reader))
234
+ end
235
+
236
+ def test_primitive_writer_schema
237
+ valid_reader = string_schema
238
+ assert_true(can_read?(string_schema, valid_reader))
239
+ assert_false(can_read?(int_schema, string_schema))
240
+ end
241
+
242
+ def test_union_reader_writer_subset_incompatiblity
243
+ # reader union schema must contain all writer union branches
244
+ union_writer = union_schema(int_schema, string_schema)
245
+ union_reader = union_schema(string_schema)
246
+
247
+ assert_false(can_read?(union_writer, union_reader))
248
+ assert_true(can_read?(union_reader, union_writer))
249
+ end
250
+
251
+ def test_incompatible_record_field
252
+ string_schema = Avro::Schema.parse <<-SCHEMA
253
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
254
+ {"name":"field1", "type":"string"}
255
+ ]}
256
+ SCHEMA
257
+ int_schema = Avro::Schema.parse <<-SCHEMA2
258
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
259
+ {"name":"field1", "type":"int"}
260
+ ]}
261
+ SCHEMA2
262
+ assert_false(can_read?(string_schema, int_schema))
263
+ end
264
+
265
+ def test_enum_symbols
266
+ enum_schema1 = Avro::Schema.parse <<-SCHEMA
267
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B"]}
268
+ SCHEMA
269
+ enum_schema2 = Avro::Schema.parse <<-SCHEMA
270
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}
271
+ SCHEMA
272
+ assert_false(can_read?(enum_schema2, enum_schema1))
273
+ assert_true(can_read?(enum_schema1, enum_schema2))
274
+ end
275
+
276
+ def test_crossed_aliases
277
+ writer_schema = Avro::Schema.parse(<<-SCHEMA)
278
+ {"type":"record", "name":"Record", "fields":[
279
+ {"name":"field1", "type": "int"},
280
+ {"name":"field2", "type": "string"}
281
+ ]}
282
+ SCHEMA
283
+ reader_schema = Avro::Schema.parse(<<-SCHEMA)
284
+ {"type":"record", "name":"Record", "fields":[
285
+ {"name":"field1", "aliases":["field2"], "type":"string"},
286
+ {"name":"field2", "aliases":["field1"], "type":"int"}
287
+ ]}
288
+ SCHEMA
289
+ # Not supported; alias is not used if there is a redirect match
290
+ assert_false(can_read?(writer_schema, reader_schema))
291
+ end
292
+
293
+ # Tests from lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
294
+
295
+ def point_2d_schema
296
+ Avro::Schema.parse <<-SCHEMA
297
+ {"type":"record", "name":"Point2D", "fields":[
298
+ {"name":"x", "type":"double"},
299
+ {"name":"y", "type":"double"}
300
+ ]}
301
+ SCHEMA
302
+ end
303
+
304
+ def point_2d_fullname_schema
305
+ Avro::Schema.parse <<-SCHEMA
306
+ {"type":"record", "name":"Point", "namespace":"written", "fields":[
307
+ {"name":"x", "type":"double"},
308
+ {"name":"y", "type":"double"}
309
+ ]}
310
+ SCHEMA
311
+ end
312
+
313
+ def point_3d_no_default_schema
314
+ Avro::Schema.parse <<-SCHEMA
315
+ {"type":"record", "name":"Point", "fields":[
316
+ {"name":"x", "type":"double"},
317
+ {"name":"y", "type":"double"},
318
+ {"name":"z", "type":"double"}
319
+ ]}
320
+ SCHEMA
321
+ end
322
+
323
+ def point_3d_schema
324
+ Avro::Schema.parse <<-SCHEMA
325
+ {"type":"record", "name":"Point3D", "fields":[
326
+ {"name":"x", "type":"double"},
327
+ {"name":"y", "type":"double"},
328
+ {"name":"z", "type":"double", "default": 0.0}
329
+ ]}
330
+ SCHEMA
331
+ end
332
+
333
+ def point_3d_match_name_schema
334
+ Avro::Schema.parse <<-SCHEMA
335
+ {"type":"record", "name":"Point", "fields":[
336
+ {"name":"x", "type":"double"},
337
+ {"name":"y", "type":"double"},
338
+ {"name":"z", "type":"double", "default": 0.0}
339
+ ]}
340
+ SCHEMA
341
+ end
342
+
343
+ def test_union_resolution_no_structure_match
344
+ # short name match, but no structure match
345
+ read_schema = union_schema(null_schema, point_3d_no_default_schema)
346
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
347
+ end
348
+
349
+ def test_union_resolution_first_structure_match_2d
350
+ # multiple structure matches with no name matches
351
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_2d_schema, point_3d_schema)
352
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
353
+ end
354
+
355
+ def test_union_resolution_first_structure_match_3d
356
+ # multiple structure matches with no name matches
357
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_3d_schema, point_2d_schema)
358
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
359
+ end
360
+
361
+ def test_union_resolution_named_structure_match
362
+ # multiple structure matches with a short name match
363
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema)
364
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
365
+ end
366
+
367
+ def test_union_resolution_full_name_match
368
+ # there is a full name match that should be chosen
369
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema, point_2d_fullname_schema)
370
+ assert_true(can_read?(point_2d_fullname_schema, read_schema))
371
+ end
372
+
373
+ def can_read?(writer, reader)
374
+ Avro::SchemaCompatibility.can_read?(writer, reader)
375
+ end
376
+
377
+ def union_schema(*schemas)
378
+ schemas ||= []
379
+ Avro::Schema.parse("[#{schemas.map(&:to_s).join(',')}]")
380
+ end
381
+
382
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
383
+ define_method("#{schema_type}_schema") do
384
+ Avro::Schema.parse("\"#{schema_type}\"")
385
+ end
386
+ end
387
+
388
+ def int_array_schema
389
+ Avro::Schema.parse('{"type":"array", "items":"int"}')
390
+ end
391
+
392
+ def long_array_schema
393
+ Avro::Schema.parse('{"type":"array", "items":"long"}')
394
+ end
395
+
396
+ def string_array_schema
397
+ Avro::Schema.parse('{"type":"array", "items":"string"}')
398
+ end
399
+
400
+ def int_map_schema
401
+ Avro::Schema.parse('{"type":"map", "values":"int"}')
402
+ end
403
+
404
+ def long_map_schema
405
+ Avro::Schema.parse('{"type":"map", "values":"long"}')
406
+ end
407
+
408
+ def string_map_schema
409
+ Avro::Schema.parse('{"type":"map", "values":"string"}')
410
+ end
411
+
412
+ def enum1_ab_schema
413
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
414
+ end
415
+
416
+ def enum1_ab_default_schema
417
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"], "default":"A"}')
418
+ end
419
+
420
+ def enum1_ab_aliased_schema
421
+ Avro::Schema.parse('{"type":"enum", "name":"Enum2", "aliases":["Enum1"], "symbols":["A","B"]}')
422
+ end
423
+
424
+ def enum1_abc_schema
425
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
426
+ end
427
+
428
+ def enum1_bc_schema
429
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["B","C"]}')
430
+ end
431
+
432
+ def enum2_ab_schema
433
+ Avro::Schema.parse('{"type":"enum", "name":"Enum2", "symbols":["A","B"]}')
434
+ end
435
+
436
+ def fixed1_schema
437
+ Avro::Schema.parse('{"type":"fixed", "name":"Fixed1", "size": 2}')
438
+ end
439
+
440
+ def fixed1_aliased_schema
441
+ Avro::Schema.parse('{"type":"fixed", "name":"Fixed2", "aliases":["Fixed1"], "size": 2}')
442
+ end
443
+
444
+ def fixed2_schema
445
+ Avro::Schema.parse('{"type":"fixed", "name":"Fixed2", "size": 2}')
446
+ end
447
+
448
+ def fixed1_size3_schema
449
+ Avro::Schema.parse('{"type":"fixed", "name":"Fixed1", "size": 3}')
450
+ end
451
+
452
+ def empty_record1_schema
453
+ Avro::Schema.parse('{"type":"record", "name":"Record1"}')
454
+ end
455
+
456
+ def empty_record1_aliased_schema
457
+ Avro::Schema.parse('{"type":"record", "name":"Record2", "aliases":["Record1"]}')
458
+ end
459
+
460
+ def empty_record2_schema
461
+ Avro::Schema.parse('{"type":"record", "name":"Record2"}')
462
+ end
463
+
464
+ def a_int_record1_schema
465
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}')
466
+ end
467
+
468
+ def a_long_record1_schema
469
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}')
470
+ end
471
+
472
+ def a_int_b_int_record1_schema
473
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}')
474
+ end
475
+
476
+ def a_dint_record1_schema
477
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}')
478
+ end
479
+
480
+ def a_int_b_dint_record1_schema
481
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}')
482
+ end
483
+
484
+ def a_dint_b_dint_record1_schema
485
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}')
486
+ end
487
+
488
+ def nested_record
489
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}}]}')
490
+ end
491
+
492
+ def nested_optional_record
493
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":["null",{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}],"default":null}]}')
494
+ end
495
+
496
+ def int_list_record_schema
497
+ Avro::Schema.parse <<-SCHEMA
498
+ {
499
+ "type":"record", "name":"List", "fields": [
500
+ {"name": "head", "type": "int"},
501
+ {"name": "tail", "type": "List"}
502
+ ]}
503
+ SCHEMA
504
+ end
505
+
506
+ def long_list_record_schema
507
+ Avro::Schema.parse <<-SCHEMA
508
+ {
509
+ "type":"record", "name":"List", "fields": [
510
+ {"name": "head", "type": "long"},
511
+ {"name": "tail", "type": "List"}
512
+ ]}
513
+ SCHEMA
514
+ end
515
+
516
+ def empty_union_schema
517
+ union_schema
518
+ end
519
+
520
+ def null_union_schema
521
+ union_schema(null_schema)
522
+ end
523
+
524
+ def int_union_schema
525
+ union_schema(int_schema)
526
+ end
527
+
528
+ def long_union_schema
529
+ union_schema(long_schema)
530
+ end
531
+
532
+ def string_union_schema
533
+ union_schema(string_schema)
534
+ end
535
+
536
+ def int_string_union_schema
537
+ union_schema(int_schema, string_schema)
538
+ end
539
+
540
+ def string_int_union_schema
541
+ union_schema(string_schema, int_schema)
542
+ end
543
+ end