avro 1.8.2 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,475 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_help'
18
+
19
+ class TestSchemaCompatibility < Test::Unit::TestCase
20
+
21
+ def test_primitive_schema_compatibility
22
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
23
+ assert_true(can_read?(send("#{schema_type}_schema"), send("#{schema_type}_schema")))
24
+ end
25
+ end
26
+
27
+ def test_compatible_reader_writer_pairs
28
+ [
29
+ long_schema, int_schema,
30
+ float_schema, int_schema,
31
+ float_schema, long_schema,
32
+ double_schema, long_schema,
33
+ double_schema, int_schema,
34
+ double_schema, float_schema,
35
+
36
+ int_array_schema, int_array_schema,
37
+ long_array_schema, int_array_schema,
38
+ int_map_schema, int_map_schema,
39
+ long_map_schema, int_map_schema,
40
+
41
+ enum1_ab_schema, enum1_ab_schema,
42
+ enum1_abc_schema, enum1_ab_schema,
43
+
44
+ string_schema, bytes_schema,
45
+ bytes_schema, string_schema,
46
+
47
+ empty_union_schema, empty_union_schema,
48
+ int_union_schema, int_union_schema,
49
+ int_string_union_schema, string_int_union_schema,
50
+ int_union_schema, empty_union_schema,
51
+ long_union_schema, int_union_schema,
52
+
53
+ int_union_schema, int_schema,
54
+ int_schema, int_union_schema,
55
+
56
+ empty_record1_schema, empty_record1_schema,
57
+ empty_record1_schema, a_int_record1_schema,
58
+
59
+ a_int_record1_schema, a_int_record1_schema,
60
+ a_dint_record1_schema, a_int_record1_schema,
61
+ a_dint_record1_schema, a_dint_record1_schema,
62
+ a_int_record1_schema, a_dint_record1_schema,
63
+
64
+ a_long_record1_schema, a_int_record1_schema,
65
+
66
+ a_int_record1_schema, a_int_b_int_record1_schema,
67
+ a_dint_record1_schema, a_int_b_int_record1_schema,
68
+
69
+ a_int_b_dint_record1_schema, a_int_record1_schema,
70
+ a_dint_b_dint_record1_schema, empty_record1_schema,
71
+ a_dint_b_dint_record1_schema, a_int_record1_schema,
72
+ a_int_b_int_record1_schema, a_dint_b_dint_record1_schema,
73
+
74
+ int_list_record_schema, int_list_record_schema,
75
+ long_list_record_schema, long_list_record_schema,
76
+ long_list_record_schema, int_list_record_schema,
77
+
78
+ null_schema, null_schema,
79
+
80
+ nested_optional_record, nested_record
81
+ ].each_slice(2) do |(reader, writer)|
82
+ assert_true(can_read?(writer, reader), "expecting #{reader} to read #{writer}")
83
+ end
84
+ end
85
+
86
+ def test_broken
87
+ assert_false(can_read?(int_string_union_schema, int_union_schema))
88
+ end
89
+
90
+ def test_incompatible_reader_writer_pairs
91
+ [
92
+ null_schema, int_schema,
93
+ null_schema, long_schema,
94
+
95
+ boolean_schema, int_schema,
96
+
97
+ int_schema, null_schema,
98
+ int_schema, boolean_schema,
99
+ int_schema, long_schema,
100
+ int_schema, float_schema,
101
+ int_schema, double_schema,
102
+
103
+ long_schema, float_schema,
104
+ long_schema, double_schema,
105
+
106
+ float_schema, double_schema,
107
+
108
+ string_schema, boolean_schema,
109
+ string_schema, int_schema,
110
+
111
+ bytes_schema, null_schema,
112
+ bytes_schema, int_schema,
113
+
114
+ int_array_schema, long_array_schema,
115
+ int_map_schema, int_array_schema,
116
+ int_array_schema, int_map_schema,
117
+ int_map_schema, long_map_schema,
118
+
119
+ enum1_ab_schema, enum1_abc_schema,
120
+ enum1_bc_schema, enum1_abc_schema,
121
+
122
+ enum1_ab_schema, enum2_ab_schema,
123
+ int_schema, enum2_ab_schema,
124
+ enum2_ab_schema, int_schema,
125
+
126
+ int_union_schema, int_string_union_schema,
127
+ string_union_schema, int_string_union_schema,
128
+
129
+ empty_record2_schema, empty_record1_schema,
130
+ a_int_record1_schema, empty_record1_schema,
131
+ a_int_b_dint_record1_schema, empty_record1_schema,
132
+
133
+ int_list_record_schema, long_list_record_schema,
134
+
135
+ null_schema, int_schema,
136
+
137
+ nested_record, nested_optional_record
138
+ ].each_slice(2) do |(reader, writer)|
139
+ assert_false(can_read?(writer, reader), "expecting #{reader} not to read #{writer}")
140
+ end
141
+ end
142
+
143
+ def writer_schema
144
+ Avro::Schema.parse <<-SCHEMA
145
+ {"type":"record", "name":"Record", "fields":[
146
+ {"name":"oldfield1", "type":"int"},
147
+ {"name":"oldfield2", "type":"string"}
148
+ ]}
149
+ SCHEMA
150
+ end
151
+
152
+ def test_missing_field
153
+ reader_schema = Avro::Schema.parse <<-SCHEMA
154
+ {"type":"record", "name":"Record", "fields":[
155
+ {"name":"oldfield1", "type":"int"}
156
+ ]}
157
+ SCHEMA
158
+ assert_true(can_read?(writer_schema, reader_schema))
159
+ assert_false(can_read?(reader_schema, writer_schema))
160
+ end
161
+
162
+ def test_missing_second_field
163
+ reader_schema = Avro::Schema.parse <<-SCHEMA
164
+ {"type":"record", "name":"Record", "fields":[
165
+ {"name":"oldfield2", "type":"string"}
166
+ ]}
167
+ SCHEMA
168
+ assert_true(can_read?(writer_schema, reader_schema))
169
+ assert_false(can_read?(reader_schema, writer_schema))
170
+ end
171
+
172
+ def test_all_fields
173
+ reader_schema = Avro::Schema.parse <<-SCHEMA
174
+ {"type":"record", "name":"Record", "fields":[
175
+ {"name":"oldfield1", "type":"int"},
176
+ {"name":"oldfield2", "type":"string"}
177
+ ]}
178
+ SCHEMA
179
+ assert_true(can_read?(writer_schema, reader_schema))
180
+ assert_true(can_read?(reader_schema, writer_schema))
181
+ end
182
+
183
+ def test_new_field_with_default
184
+ reader_schema = Avro::Schema.parse <<-SCHEMA
185
+ {"type":"record", "name":"Record", "fields":[
186
+ {"name":"oldfield1", "type":"int"},
187
+ {"name":"newfield1", "type":"int", "default":42}
188
+ ]}
189
+ SCHEMA
190
+ assert_true(can_read?(writer_schema, reader_schema))
191
+ assert_false(can_read?(reader_schema, writer_schema))
192
+ end
193
+
194
+ def test_new_field
195
+ reader_schema = Avro::Schema.parse <<-SCHEMA
196
+ {"type":"record", "name":"Record", "fields":[
197
+ {"name":"oldfield1", "type":"int"},
198
+ {"name":"newfield1", "type":"int"}
199
+ ]}
200
+ SCHEMA
201
+ assert_false(can_read?(writer_schema, reader_schema))
202
+ assert_false(can_read?(reader_schema, writer_schema))
203
+ end
204
+
205
+ def test_array_writer_schema
206
+ valid_reader = string_array_schema
207
+ invalid_reader = string_map_schema
208
+
209
+ assert_true(can_read?(string_array_schema, valid_reader))
210
+ assert_false(can_read?(string_array_schema, invalid_reader))
211
+ end
212
+
213
+ def test_primitive_writer_schema
214
+ valid_reader = string_schema
215
+ assert_true(can_read?(string_schema, valid_reader))
216
+ assert_false(can_read?(int_schema, string_schema))
217
+ end
218
+
219
+ def test_union_reader_writer_subset_incompatiblity
220
+ # reader union schema must contain all writer union branches
221
+ union_writer = union_schema(int_schema, string_schema)
222
+ union_reader = union_schema(string_schema)
223
+
224
+ assert_false(can_read?(union_writer, union_reader))
225
+ assert_true(can_read?(union_reader, union_writer))
226
+ end
227
+
228
+ def test_incompatible_record_field
229
+ string_schema = Avro::Schema.parse <<-SCHEMA
230
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
231
+ {"name":"field1", "type":"string"}
232
+ ]}
233
+ SCHEMA
234
+ int_schema = Avro::Schema.parse <<-SCHEMA2
235
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
236
+ {"name":"field1", "type":"int"}
237
+ ]}
238
+ SCHEMA2
239
+ assert_false(can_read?(string_schema, int_schema))
240
+ end
241
+
242
+ def test_enum_symbols
243
+ enum_schema1 = Avro::Schema.parse <<-SCHEMA
244
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B"]}
245
+ SCHEMA
246
+ enum_schema2 = Avro::Schema.parse <<-SCHEMA
247
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}
248
+ SCHEMA
249
+ assert_false(can_read?(enum_schema2, enum_schema1))
250
+ assert_true(can_read?(enum_schema1, enum_schema2))
251
+ end
252
+
253
+ # Tests from lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
254
+
255
+ def point_2d_schema
256
+ Avro::Schema.parse <<-SCHEMA
257
+ {"type":"record", "name":"Point2D", "fields":[
258
+ {"name":"x", "type":"double"},
259
+ {"name":"y", "type":"double"}
260
+ ]}
261
+ SCHEMA
262
+ end
263
+
264
+ def point_2d_fullname_schema
265
+ Avro::Schema.parse <<-SCHEMA
266
+ {"type":"record", "name":"Point", "namespace":"written", "fields":[
267
+ {"name":"x", "type":"double"},
268
+ {"name":"y", "type":"double"}
269
+ ]}
270
+ SCHEMA
271
+ end
272
+
273
+ def point_3d_no_default_schema
274
+ Avro::Schema.parse <<-SCHEMA
275
+ {"type":"record", "name":"Point", "fields":[
276
+ {"name":"x", "type":"double"},
277
+ {"name":"y", "type":"double"},
278
+ {"name":"z", "type":"double"}
279
+ ]}
280
+ SCHEMA
281
+ end
282
+
283
+ def point_3d_schema
284
+ Avro::Schema.parse <<-SCHEMA
285
+ {"type":"record", "name":"Point3D", "fields":[
286
+ {"name":"x", "type":"double"},
287
+ {"name":"y", "type":"double"},
288
+ {"name":"z", "type":"double", "default": 0.0}
289
+ ]}
290
+ SCHEMA
291
+ end
292
+
293
+ def point_3d_match_name_schema
294
+ Avro::Schema.parse <<-SCHEMA
295
+ {"type":"record", "name":"Point", "fields":[
296
+ {"name":"x", "type":"double"},
297
+ {"name":"y", "type":"double"},
298
+ {"name":"z", "type":"double", "default": 0.0}
299
+ ]}
300
+ SCHEMA
301
+ end
302
+
303
+ def test_union_resolution_no_structure_match
304
+ # short name match, but no structure match
305
+ read_schema = union_schema(null_schema, point_3d_no_default_schema)
306
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
307
+ end
308
+
309
+ def test_union_resolution_first_structure_match_2d
310
+ # multiple structure matches with no name matches
311
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_2d_schema, point_3d_schema)
312
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
313
+ end
314
+
315
+ def test_union_resolution_first_structure_match_3d
316
+ # multiple structure matches with no name matches
317
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_3d_schema, point_2d_schema)
318
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
319
+ end
320
+
321
+ def test_union_resolution_named_structure_match
322
+ # multiple structure matches with a short name match
323
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema)
324
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
325
+ end
326
+
327
+ def test_union_resolution_full_name_match
328
+ # there is a full name match that should be chosen
329
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema, point_2d_fullname_schema)
330
+ assert_true(can_read?(point_2d_fullname_schema, read_schema))
331
+ end
332
+
333
+ def can_read?(writer, reader)
334
+ Avro::SchemaCompatibility.can_read?(writer, reader)
335
+ end
336
+
337
+ def union_schema(*schemas)
338
+ schemas ||= []
339
+ Avro::Schema.parse("[#{schemas.map(&:to_s).join(',')}]")
340
+ end
341
+
342
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
343
+ define_method("#{schema_type}_schema") do
344
+ Avro::Schema.parse("\"#{schema_type}\"")
345
+ end
346
+ end
347
+
348
+ def int_array_schema
349
+ Avro::Schema.parse('{"type":"array", "items":"int"}')
350
+ end
351
+
352
+ def long_array_schema
353
+ Avro::Schema.parse('{"type":"array", "items":"long"}')
354
+ end
355
+
356
+ def string_array_schema
357
+ Avro::Schema.parse('{"type":"array", "items":"string"}')
358
+ end
359
+
360
+ def int_map_schema
361
+ Avro::Schema.parse('{"type":"map", "values":"int"}')
362
+ end
363
+
364
+ def long_map_schema
365
+ Avro::Schema.parse('{"type":"map", "values":"long"}')
366
+ end
367
+
368
+ def string_map_schema
369
+ Avro::Schema.parse('{"type":"map", "values":"string"}')
370
+ end
371
+
372
+ def enum1_ab_schema
373
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
374
+ end
375
+
376
+ def enum1_abc_schema
377
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
378
+ end
379
+
380
+ def enum1_bc_schema
381
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["B","C"]}')
382
+ end
383
+
384
+ def enum2_ab_schema
385
+ Avro::Schema.parse('{"type":"enum", "name":"Enum2", "symbols":["A","B"]}')
386
+ end
387
+
388
+ def empty_record1_schema
389
+ Avro::Schema.parse('{"type":"record", "name":"Record1"}')
390
+ end
391
+
392
+ def empty_record2_schema
393
+ Avro::Schema.parse('{"type":"record", "name":"Record2"}')
394
+ end
395
+
396
+ def a_int_record1_schema
397
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}')
398
+ end
399
+
400
+ def a_long_record1_schema
401
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}')
402
+ end
403
+
404
+ def a_int_b_int_record1_schema
405
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}')
406
+ end
407
+
408
+ def a_dint_record1_schema
409
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}')
410
+ end
411
+
412
+ def a_int_b_dint_record1_schema
413
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}')
414
+ end
415
+
416
+ def a_dint_b_dint_record1_schema
417
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}')
418
+ end
419
+
420
+ def nested_record
421
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}}]}')
422
+ end
423
+
424
+ def nested_optional_record
425
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":["null",{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}],"default":null}]}')
426
+ end
427
+
428
+ def int_list_record_schema
429
+ Avro::Schema.parse <<-SCHEMA
430
+ {
431
+ "type":"record", "name":"List", "fields": [
432
+ {"name": "head", "type": "int"},
433
+ {"name": "tail", "type": "List"}
434
+ ]}
435
+ SCHEMA
436
+ end
437
+
438
+ def long_list_record_schema
439
+ Avro::Schema.parse <<-SCHEMA
440
+ {
441
+ "type":"record", "name":"List", "fields": [
442
+ {"name": "head", "type": "long"},
443
+ {"name": "tail", "type": "List"}
444
+ ]}
445
+ SCHEMA
446
+ end
447
+
448
+ def empty_union_schema
449
+ union_schema
450
+ end
451
+
452
+ def null_union_schema
453
+ union_schema(null_schema)
454
+ end
455
+
456
+ def int_union_schema
457
+ union_schema(int_schema)
458
+ end
459
+
460
+ def long_union_schema
461
+ union_schema(long_schema)
462
+ end
463
+
464
+ def string_union_schema
465
+ union_schema(string_schema)
466
+ end
467
+
468
+ def int_string_union_schema
469
+ union_schema(int_schema, string_schema)
470
+ end
471
+
472
+ def string_int_union_schema
473
+ union_schema(string_schema, int_schema)
474
+ end
475
+ end