avro 1.8.2 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,475 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestSchemaCompatibility < Test::Unit::TestCase
20
+
21
+ def test_primitive_schema_compatibility
22
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
23
+ assert_true(can_read?(send("#{schema_type}_schema"), send("#{schema_type}_schema")))
24
+ end
25
+ end
26
+
27
+ def test_compatible_reader_writer_pairs
28
+ [
29
+ long_schema, int_schema,
30
+ float_schema, int_schema,
31
+ float_schema, long_schema,
32
+ double_schema, long_schema,
33
+ double_schema, int_schema,
34
+ double_schema, float_schema,
35
+
36
+ int_array_schema, int_array_schema,
37
+ long_array_schema, int_array_schema,
38
+ int_map_schema, int_map_schema,
39
+ long_map_schema, int_map_schema,
40
+
41
+ enum1_ab_schema, enum1_ab_schema,
42
+ enum1_abc_schema, enum1_ab_schema,
43
+
44
+ string_schema, bytes_schema,
45
+ bytes_schema, string_schema,
46
+
47
+ empty_union_schema, empty_union_schema,
48
+ int_union_schema, int_union_schema,
49
+ int_string_union_schema, string_int_union_schema,
50
+ int_union_schema, empty_union_schema,
51
+ long_union_schema, int_union_schema,
52
+
53
+ int_union_schema, int_schema,
54
+ int_schema, int_union_schema,
55
+
56
+ empty_record1_schema, empty_record1_schema,
57
+ empty_record1_schema, a_int_record1_schema,
58
+
59
+ a_int_record1_schema, a_int_record1_schema,
60
+ a_dint_record1_schema, a_int_record1_schema,
61
+ a_dint_record1_schema, a_dint_record1_schema,
62
+ a_int_record1_schema, a_dint_record1_schema,
63
+
64
+ a_long_record1_schema, a_int_record1_schema,
65
+
66
+ a_int_record1_schema, a_int_b_int_record1_schema,
67
+ a_dint_record1_schema, a_int_b_int_record1_schema,
68
+
69
+ a_int_b_dint_record1_schema, a_int_record1_schema,
70
+ a_dint_b_dint_record1_schema, empty_record1_schema,
71
+ a_dint_b_dint_record1_schema, a_int_record1_schema,
72
+ a_int_b_int_record1_schema, a_dint_b_dint_record1_schema,
73
+
74
+ int_list_record_schema, int_list_record_schema,
75
+ long_list_record_schema, long_list_record_schema,
76
+ long_list_record_schema, int_list_record_schema,
77
+
78
+ null_schema, null_schema,
79
+
80
+ nested_optional_record, nested_record
81
+ ].each_slice(2) do |(reader, writer)|
82
+ assert_true(can_read?(writer, reader), "expecting #{reader} to read #{writer}")
83
+ end
84
+ end
85
+
86
+ def test_broken
87
+ assert_false(can_read?(int_string_union_schema, int_union_schema))
88
+ end
89
+
90
+ def test_incompatible_reader_writer_pairs
91
+ [
92
+ null_schema, int_schema,
93
+ null_schema, long_schema,
94
+
95
+ boolean_schema, int_schema,
96
+
97
+ int_schema, null_schema,
98
+ int_schema, boolean_schema,
99
+ int_schema, long_schema,
100
+ int_schema, float_schema,
101
+ int_schema, double_schema,
102
+
103
+ long_schema, float_schema,
104
+ long_schema, double_schema,
105
+
106
+ float_schema, double_schema,
107
+
108
+ string_schema, boolean_schema,
109
+ string_schema, int_schema,
110
+
111
+ bytes_schema, null_schema,
112
+ bytes_schema, int_schema,
113
+
114
+ int_array_schema, long_array_schema,
115
+ int_map_schema, int_array_schema,
116
+ int_array_schema, int_map_schema,
117
+ int_map_schema, long_map_schema,
118
+
119
+ enum1_ab_schema, enum1_abc_schema,
120
+ enum1_bc_schema, enum1_abc_schema,
121
+
122
+ enum1_ab_schema, enum2_ab_schema,
123
+ int_schema, enum2_ab_schema,
124
+ enum2_ab_schema, int_schema,
125
+
126
+ int_union_schema, int_string_union_schema,
127
+ string_union_schema, int_string_union_schema,
128
+
129
+ empty_record2_schema, empty_record1_schema,
130
+ a_int_record1_schema, empty_record1_schema,
131
+ a_int_b_dint_record1_schema, empty_record1_schema,
132
+
133
+ int_list_record_schema, long_list_record_schema,
134
+
135
+ null_schema, int_schema,
136
+
137
+ nested_record, nested_optional_record
138
+ ].each_slice(2) do |(reader, writer)|
139
+ assert_false(can_read?(writer, reader), "expecting #{reader} not to read #{writer}")
140
+ end
141
+ end
142
+
143
+ def writer_schema
144
+ Avro::Schema.parse <<-SCHEMA
145
+ {"type":"record", "name":"Record", "fields":[
146
+ {"name":"oldfield1", "type":"int"},
147
+ {"name":"oldfield2", "type":"string"}
148
+ ]}
149
+ SCHEMA
150
+ end
151
+
152
+ def test_missing_field
153
+ reader_schema = Avro::Schema.parse <<-SCHEMA
154
+ {"type":"record", "name":"Record", "fields":[
155
+ {"name":"oldfield1", "type":"int"}
156
+ ]}
157
+ SCHEMA
158
+ assert_true(can_read?(writer_schema, reader_schema))
159
+ assert_false(can_read?(reader_schema, writer_schema))
160
+ end
161
+
162
+ def test_missing_second_field
163
+ reader_schema = Avro::Schema.parse <<-SCHEMA
164
+ {"type":"record", "name":"Record", "fields":[
165
+ {"name":"oldfield2", "type":"string"}
166
+ ]}
167
+ SCHEMA
168
+ assert_true(can_read?(writer_schema, reader_schema))
169
+ assert_false(can_read?(reader_schema, writer_schema))
170
+ end
171
+
172
+ def test_all_fields
173
+ reader_schema = Avro::Schema.parse <<-SCHEMA
174
+ {"type":"record", "name":"Record", "fields":[
175
+ {"name":"oldfield1", "type":"int"},
176
+ {"name":"oldfield2", "type":"string"}
177
+ ]}
178
+ SCHEMA
179
+ assert_true(can_read?(writer_schema, reader_schema))
180
+ assert_true(can_read?(reader_schema, writer_schema))
181
+ end
182
+
183
+ def test_new_field_with_default
184
+ reader_schema = Avro::Schema.parse <<-SCHEMA
185
+ {"type":"record", "name":"Record", "fields":[
186
+ {"name":"oldfield1", "type":"int"},
187
+ {"name":"newfield1", "type":"int", "default":42}
188
+ ]}
189
+ SCHEMA
190
+ assert_true(can_read?(writer_schema, reader_schema))
191
+ assert_false(can_read?(reader_schema, writer_schema))
192
+ end
193
+
194
+ def test_new_field
195
+ reader_schema = Avro::Schema.parse <<-SCHEMA
196
+ {"type":"record", "name":"Record", "fields":[
197
+ {"name":"oldfield1", "type":"int"},
198
+ {"name":"newfield1", "type":"int"}
199
+ ]}
200
+ SCHEMA
201
+ assert_false(can_read?(writer_schema, reader_schema))
202
+ assert_false(can_read?(reader_schema, writer_schema))
203
+ end
204
+
205
+ def test_array_writer_schema
206
+ valid_reader = string_array_schema
207
+ invalid_reader = string_map_schema
208
+
209
+ assert_true(can_read?(string_array_schema, valid_reader))
210
+ assert_false(can_read?(string_array_schema, invalid_reader))
211
+ end
212
+
213
+ def test_primitive_writer_schema
214
+ valid_reader = string_schema
215
+ assert_true(can_read?(string_schema, valid_reader))
216
+ assert_false(can_read?(int_schema, string_schema))
217
+ end
218
+
219
+ def test_union_reader_writer_subset_incompatiblity
220
+ # reader union schema must contain all writer union branches
221
+ union_writer = union_schema(int_schema, string_schema)
222
+ union_reader = union_schema(string_schema)
223
+
224
+ assert_false(can_read?(union_writer, union_reader))
225
+ assert_true(can_read?(union_reader, union_writer))
226
+ end
227
+
228
+ def test_incompatible_record_field
229
+ string_schema = Avro::Schema.parse <<-SCHEMA
230
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
231
+ {"name":"field1", "type":"string"}
232
+ ]}
233
+ SCHEMA
234
+ int_schema = Avro::Schema.parse <<-SCHEMA2
235
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
236
+ {"name":"field1", "type":"int"}
237
+ ]}
238
+ SCHEMA2
239
+ assert_false(can_read?(string_schema, int_schema))
240
+ end
241
+
242
+ def test_enum_symbols
243
+ enum_schema1 = Avro::Schema.parse <<-SCHEMA
244
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B"]}
245
+ SCHEMA
246
+ enum_schema2 = Avro::Schema.parse <<-SCHEMA
247
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}
248
+ SCHEMA
249
+ assert_false(can_read?(enum_schema2, enum_schema1))
250
+ assert_true(can_read?(enum_schema1, enum_schema2))
251
+ end
252
+
253
+ # Tests from lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
254
+
255
+ def point_2d_schema
256
+ Avro::Schema.parse <<-SCHEMA
257
+ {"type":"record", "name":"Point2D", "fields":[
258
+ {"name":"x", "type":"double"},
259
+ {"name":"y", "type":"double"}
260
+ ]}
261
+ SCHEMA
262
+ end
263
+
264
+ def point_2d_fullname_schema
265
+ Avro::Schema.parse <<-SCHEMA
266
+ {"type":"record", "name":"Point", "namespace":"written", "fields":[
267
+ {"name":"x", "type":"double"},
268
+ {"name":"y", "type":"double"}
269
+ ]}
270
+ SCHEMA
271
+ end
272
+
273
+ def point_3d_no_default_schema
274
+ Avro::Schema.parse <<-SCHEMA
275
+ {"type":"record", "name":"Point", "fields":[
276
+ {"name":"x", "type":"double"},
277
+ {"name":"y", "type":"double"},
278
+ {"name":"z", "type":"double"}
279
+ ]}
280
+ SCHEMA
281
+ end
282
+
283
+ def point_3d_schema
284
+ Avro::Schema.parse <<-SCHEMA
285
+ {"type":"record", "name":"Point3D", "fields":[
286
+ {"name":"x", "type":"double"},
287
+ {"name":"y", "type":"double"},
288
+ {"name":"z", "type":"double", "default": 0.0}
289
+ ]}
290
+ SCHEMA
291
+ end
292
+
293
+ def point_3d_match_name_schema
294
+ Avro::Schema.parse <<-SCHEMA
295
+ {"type":"record", "name":"Point", "fields":[
296
+ {"name":"x", "type":"double"},
297
+ {"name":"y", "type":"double"},
298
+ {"name":"z", "type":"double", "default": 0.0}
299
+ ]}
300
+ SCHEMA
301
+ end
302
+
303
+ def test_union_resolution_no_structure_match
304
+ # short name match, but no structure match
305
+ read_schema = union_schema(null_schema, point_3d_no_default_schema)
306
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
307
+ end
308
+
309
+ def test_union_resolution_first_structure_match_2d
310
+ # multiple structure matches with no name matches
311
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_2d_schema, point_3d_schema)
312
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
313
+ end
314
+
315
+ def test_union_resolution_first_structure_match_3d
316
+ # multiple structure matches with no name matches
317
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_3d_schema, point_2d_schema)
318
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
319
+ end
320
+
321
+ def test_union_resolution_named_structure_match
322
+ # multiple structure matches with a short name match
323
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema)
324
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
325
+ end
326
+
327
+ def test_union_resolution_full_name_match
328
+ # there is a full name match that should be chosen
329
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema, point_2d_fullname_schema)
330
+ assert_true(can_read?(point_2d_fullname_schema, read_schema))
331
+ end
332
+
333
+ def can_read?(writer, reader)
334
+ Avro::SchemaCompatibility.can_read?(writer, reader)
335
+ end
336
+
337
+ def union_schema(*schemas)
338
+ schemas ||= []
339
+ Avro::Schema.parse("[#{schemas.map(&:to_s).join(',')}]")
340
+ end
341
+
342
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
343
+ define_method("#{schema_type}_schema") do
344
+ Avro::Schema.parse("\"#{schema_type}\"")
345
+ end
346
+ end
347
+
348
+ def int_array_schema
349
+ Avro::Schema.parse('{"type":"array", "items":"int"}')
350
+ end
351
+
352
+ def long_array_schema
353
+ Avro::Schema.parse('{"type":"array", "items":"long"}')
354
+ end
355
+
356
+ def string_array_schema
357
+ Avro::Schema.parse('{"type":"array", "items":"string"}')
358
+ end
359
+
360
+ def int_map_schema
361
+ Avro::Schema.parse('{"type":"map", "values":"int"}')
362
+ end
363
+
364
+ def long_map_schema
365
+ Avro::Schema.parse('{"type":"map", "values":"long"}')
366
+ end
367
+
368
+ def string_map_schema
369
+ Avro::Schema.parse('{"type":"map", "values":"string"}')
370
+ end
371
+
372
+ def enum1_ab_schema
373
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
374
+ end
375
+
376
+ def enum1_abc_schema
377
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
378
+ end
379
+
380
+ def enum1_bc_schema
381
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["B","C"]}')
382
+ end
383
+
384
+ def enum2_ab_schema
385
+ Avro::Schema.parse('{"type":"enum", "name":"Enum2", "symbols":["A","B"]}')
386
+ end
387
+
388
+ def empty_record1_schema
389
+ Avro::Schema.parse('{"type":"record", "name":"Record1"}')
390
+ end
391
+
392
+ def empty_record2_schema
393
+ Avro::Schema.parse('{"type":"record", "name":"Record2"}')
394
+ end
395
+
396
+ def a_int_record1_schema
397
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}')
398
+ end
399
+
400
+ def a_long_record1_schema
401
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}')
402
+ end
403
+
404
+ def a_int_b_int_record1_schema
405
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}')
406
+ end
407
+
408
+ def a_dint_record1_schema
409
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}')
410
+ end
411
+
412
+ def a_int_b_dint_record1_schema
413
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}')
414
+ end
415
+
416
+ def a_dint_b_dint_record1_schema
417
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}')
418
+ end
419
+
420
+ def nested_record
421
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}}]}')
422
+ end
423
+
424
+ def nested_optional_record
425
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":["null",{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}],"default":null}]}')
426
+ end
427
+
428
+ def int_list_record_schema
429
+ Avro::Schema.parse <<-SCHEMA
430
+ {
431
+ "type":"record", "name":"List", "fields": [
432
+ {"name": "head", "type": "int"},
433
+ {"name": "tail", "type": "List"}
434
+ ]}
435
+ SCHEMA
436
+ end
437
+
438
+ def long_list_record_schema
439
+ Avro::Schema.parse <<-SCHEMA
440
+ {
441
+ "type":"record", "name":"List", "fields": [
442
+ {"name": "head", "type": "long"},
443
+ {"name": "tail", "type": "List"}
444
+ ]}
445
+ SCHEMA
446
+ end
447
+
448
+ def empty_union_schema
449
+ union_schema
450
+ end
451
+
452
+ def null_union_schema
453
+ union_schema(null_schema)
454
+ end
455
+
456
+ def int_union_schema
457
+ union_schema(int_schema)
458
+ end
459
+
460
+ def long_union_schema
461
+ union_schema(long_schema)
462
+ end
463
+
464
+ def string_union_schema
465
+ union_schema(string_schema)
466
+ end
467
+
468
+ def int_string_union_schema
469
+ union_schema(int_schema, string_schema)
470
+ end
471
+
472
+ def string_int_union_schema
473
+ union_schema(string_schema, int_schema)
474
+ end
475
+ end