tros 1.7.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ {"namespace":"org.apache.avro.mapred.tether",
2
+ "protocol": "OutputProtocol",
3
+ "doc": "Transmit outputs from a map or reduce task to parent.",
4
+
5
+ "messages": {
6
+
7
+ "configure": {
8
+ "doc": "Configure task. Sent before any other message.",
9
+ "request": [
10
+ {"name": "port", "type": "int",
11
+ "doc": "The port to transmit inputs to this task on."}
12
+ ],
13
+ "response": "null",
14
+ "one-way": true
15
+ },
16
+
17
+ "output": {
18
+ "doc": "Send an output datum.",
19
+ "request": [
20
+ {"name": "datum", "type": "bytes",
21
+ "doc": "A binary-encoded instance of the declared schema."}
22
+ ],
23
+ "response": "null",
24
+ "one-way": true
25
+ },
26
+
27
+ "outputPartitioned": {
28
+ "doc": "Send map output datum explicitly naming its partition.",
29
+ "request": [
30
+ {"name": "partition", "type": "int",
31
+ "doc": "The map output partition for this datum."},
32
+ {"name": "datum", "type": "bytes",
33
+ "doc": "A binary-encoded instance of the declared schema."}
34
+ ],
35
+ "response": "null",
36
+ "one-way": true
37
+ },
38
+
39
+ "status": {
40
+ "doc": "Update the task's status message. Also acts as keepalive.",
41
+ "request": [
42
+ {"name": "message", "type": "string",
43
+ "doc": "The new status message for the task."}
44
+ ],
45
+ "response": "null",
46
+ "one-way": true
47
+ },
48
+
49
+ "count": {
50
+ "doc": "Increment a task/job counter.",
51
+ "request": [
52
+ {"name": "group", "type": "string",
53
+ "doc": "The name of the counter group."},
54
+ {"name": "name", "type": "string",
55
+ "doc": "The name of the counter to increment."},
56
+ {"name": "amount", "type": "long",
57
+ "doc": "The amount to incrment the counter."}
58
+ ],
59
+ "response": "null",
60
+ "one-way": true
61
+ },
62
+
63
+ "fail": {
64
+ "doc": "Called by a failing task to abort.",
65
+ "request": [
66
+ {"name": "message", "type": "string",
67
+ "doc": "The reason for failure."}
68
+ ],
69
+ "response": "null",
70
+ "one-way": true
71
+ },
72
+
73
+ "complete": {
74
+ "doc": "Called when a task's output has completed without error.",
75
+ "request": [],
76
+ "response": "null",
77
+ "one-way": true
78
+ }
79
+
80
+ }
81
+
82
+ }
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ class RandomData
19
+ def initialize(schm, seed=nil)
20
+ srand(seed) if seed
21
+ @seed = seed
22
+ @schm = schm
23
+ end
24
+
25
+ def next
26
+ nextdata(@schm)
27
+ end
28
+
29
+ def nextdata(schm, d=0)
30
+ case schm.type_sym
31
+ when :boolean
32
+ rand > 0.5
33
+ when :string
34
+ randstr()
35
+ when :int
36
+ rand(Tros::Schema::INT_MAX_VALUE - Tros::Schema::INT_MIN_VALUE) + Tros::Schema::INT_MIN_VALUE
37
+ when :long
38
+ rand(Tros::Schema::LONG_MAX_VALUE - Tros::Schema::LONG_MIN_VALUE) + Tros::Schema::LONG_MIN_VALUE
39
+ when :float
40
+ (-1024 + 2048 * rand).round.to_f
41
+ when :double
42
+ Tros::Schema::LONG_MIN_VALUE + (Tros::Schema::LONG_MAX_VALUE - Tros::Schema::LONG_MIN_VALUE) * rand
43
+ when :bytes
44
+ randstr(BYTEPOOL)
45
+ when :null
46
+ nil
47
+ when :array
48
+ arr = []
49
+ len = rand(5) + 2 - d
50
+ len = 0 if len < 0
51
+ len.times{ arr << nextdata(schm.items, d+1) }
52
+ arr
53
+ when :map
54
+ map = {}
55
+ len = rand(5) + 2 - d
56
+ len = 0 if len < 0
57
+ len.times do
58
+ map[nextdata(Tros::Schema::PrimitiveSchema.new(:string))] = nextdata(schm.values, d+1)
59
+ end
60
+ map
61
+ when :record, :error
62
+ m = {}
63
+ schm.fields.each do |field|
64
+ m[field.name] = nextdata(field.type, d+1)
65
+ end
66
+ m
67
+ when :union
68
+ types = schm.schemas
69
+ nextdata(types[rand(types.size)], d)
70
+ when :enum
71
+ symbols = schm.symbols
72
+ len = symbols.size
73
+ return nil if len == 0
74
+ symbols[rand(len)]
75
+ when :fixed
76
+ f = ""
77
+ schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
78
+ f
79
+ end
80
+ end
81
+
82
+ CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
83
+ BYTEPOOL = '12345abcd'
84
+
85
+ def randstr(chars=CHARPOOL, length=20)
86
+ str = ''
87
+ rand(length+1).times { str << chars[rand(chars.size)] }
88
+ str
89
+ end
90
+ end
@@ -0,0 +1,419 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_helper'
18
+
19
+ class IOTest < Minitest::Test
20
+ DATAFILE = File.join(TMP_DIR, 'io_test.avro')
21
+
22
+ def test_null
23
+ check_default('"null"', "null", nil)
24
+ end
25
+
26
+ def test_boolean
27
+ check_default('"boolean"', "true", true)
28
+ check_default('"boolean"', "false", false)
29
+ end
30
+
31
+ def test_string
32
+ check_default('"string"', '"foo"', "foo")
33
+ end
34
+
35
+ def test_bytes
36
+ check_default('"bytes"', '"foo"', "foo")
37
+ end
38
+
39
+ def test_int
40
+ check_default('"int"', "5", 5)
41
+ end
42
+
43
+ def test_long
44
+ check_default('"long"', "9", 9)
45
+ end
46
+
47
+ def test_float
48
+ check_default('"float"', "1.2", 1.2)
49
+ end
50
+
51
+ def test_double
52
+ check_default('"double"', "1.2", 1.2)
53
+ end
54
+
55
+ def test_array
56
+ array_schema = '{"type": "array", "items": "long"}'
57
+ check_default(array_schema, "[1]", [1])
58
+ end
59
+
60
+ def test_map
61
+ map_schema = '{"type": "map", "values": "long"}'
62
+ check_default(map_schema, '{"a": 1}', {"a" => 1})
63
+ end
64
+
65
+ def test_record
66
+ record_schema = <<-EOS
67
+ {"type": "record",
68
+ "name": "Test",
69
+ "fields": [{"name": "f",
70
+ "type": "long"}]}
71
+ EOS
72
+ check_default(record_schema, '{"f": 11}', {"f" => 11})
73
+ end
74
+
75
+ def test_error
76
+ error_schema = <<EOS
77
+ {"type": "error",
78
+ "name": "TestError",
79
+ "fields": [{"name": "message",
80
+ "type": "string"}]}
81
+ EOS
82
+ check_default(error_schema, '{"message": "boom"}', {"message" => "boom"})
83
+ end
84
+
85
+ def test_enum
86
+ enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
87
+ check_default(enum_schema, '"B"', "B")
88
+ end
89
+
90
+ def test_recursive
91
+ recursive_schema = <<-EOS
92
+ {
93
+ "type": "record",
94
+ "name": "Node",
95
+ "fields": [
96
+ { "name": "label", "type": "string"},
97
+ { "name": "children", "type": { "type": "array", "items": "Node"} }
98
+ ]
99
+ }
100
+ EOS
101
+ check(recursive_schema)
102
+ end
103
+
104
+ def test_union
105
+ union_schema = <<-EOS
106
+ [
107
+ "string",
108
+ "null",
109
+ "long",
110
+ {
111
+ "type": "record",
112
+ "name": "Cons",
113
+ "fields": [
114
+ {"name": "car", "type": "string"},
115
+ {"name": "cdr", "type": "string"}
116
+ ]
117
+ }
118
+ ]
119
+ EOS
120
+ check(union_schema)
121
+ check_default('["double", "long"]', "1.1", 1.1)
122
+ end
123
+
124
+ def test_lisp
125
+ lisp_schema = <<-EOS
126
+ {
127
+ "type": "record",
128
+ "name": "Lisp",
129
+ "fields": [
130
+ {
131
+ "name": "value",
132
+ "type": [
133
+ "null",
134
+ "string",
135
+ {
136
+ "type": "record",
137
+ "name": "Cons",
138
+ "fields": [
139
+ { "name": "car", "type": "Lisp" },
140
+ { "name": "cdr", "type": "Lisp" }
141
+ ]
142
+ }
143
+ ]
144
+ }
145
+ ]
146
+ }
147
+ EOS
148
+ check(lisp_schema)
149
+ end
150
+
151
+ def test_fixed
152
+ fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
153
+ check_default(fixed_schema, '"a"', "a")
154
+ end
155
+
156
+ def test_enum_with_duplicate
157
+ str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
158
+ assert_raises(Tros::SchemaParseError) do
159
+ schema = Tros::Schema.parse(str)
160
+ end
161
+ end
162
+
163
+ BINARY_INT_ENCODINGS = [
164
+ [0, '00'],
165
+ [-1, '01'],
166
+ [1, '02'],
167
+ [-2, '03'],
168
+ [2, '04'],
169
+ [-64, '7f'],
170
+ [64, '80 01'],
171
+ [8192, '80 80 01'],
172
+ [-8193, '81 80 01'],
173
+ ]
174
+
175
+ def tros_hexlify(reader)
176
+ bytes = []
177
+ current_byte = reader.read(1)
178
+ bytes << hexlify(current_byte)
179
+ while (current_byte.unpack('C').first & 0x80) != 0
180
+ current_byte = reader.read(1)
181
+ bytes << hexlify(current_byte)
182
+ end
183
+ bytes.join ' '
184
+ end
185
+
186
+ def hexlify(msg)
187
+ msg.unpack("H*")
188
+ end
189
+
190
+ def test_binary_int_encoding
191
+ for value, hex_encoding in BINARY_INT_ENCODINGS
192
+ # write datum in binary to string buffer
193
+ buffer = StringIO.new
194
+ encoder = Tros::IO::BinaryEncoder.new(buffer)
195
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"int"'))
196
+ datum_writer.write(value, encoder)
197
+
198
+ buffer.seek(0)
199
+ hex_val = tros_hexlify(buffer)
200
+
201
+ assert_equal hex_encoding, hex_val
202
+ end
203
+ end
204
+
205
+ def test_binary_long_encoding
206
+ for value, hex_encoding in BINARY_INT_ENCODINGS
207
+ buffer = StringIO.new
208
+ encoder = Tros::IO::BinaryEncoder.new(buffer)
209
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"long"'))
210
+ datum_writer.write(value, encoder)
211
+
212
+ # read it out of the buffer and hexlify it
213
+ buffer.seek(0)
214
+ hex_val = tros_hexlify(buffer)
215
+
216
+ assert_equal hex_encoding, hex_val
217
+ end
218
+ end
219
+
220
+ def test_skip_long
221
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
222
+ value_to_read = 6253
223
+
224
+ # write some data in binary to string buffer
225
+ writer = StringIO.new
226
+ encoder = Tros::IO::BinaryEncoder.new(writer)
227
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"long"'))
228
+ datum_writer.write(value_to_skip, encoder)
229
+ datum_writer.write(value_to_read, encoder)
230
+
231
+ # skip the value
232
+ reader = StringIO.new(writer.string())
233
+ decoder = Tros::IO::BinaryDecoder.new(reader)
234
+ decoder.skip_long()
235
+
236
+ # read data from string buffer
237
+ datum_reader = Tros::IO::DatumReader.new(Tros::Schema.parse('"long"'))
238
+ read_value = datum_reader.read(decoder)
239
+
240
+ # check it
241
+ assert_equal value_to_read, read_value
242
+ end
243
+ end
244
+
245
+ def test_skip_int
246
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
247
+ value_to_read = 6253
248
+
249
+ writer = StringIO.new
250
+ encoder = Tros::IO::BinaryEncoder.new(writer)
251
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"int"'))
252
+ datum_writer.write(value_to_skip, encoder)
253
+ datum_writer.write(value_to_read, encoder)
254
+
255
+ reader = StringIO.new(writer.string)
256
+ decoder = Tros::IO::BinaryDecoder.new(reader)
257
+ decoder.skip_int
258
+
259
+ datum_reader = Tros::IO::DatumReader.new(Tros::Schema.parse('"int"'))
260
+ read_value = datum_reader.read(decoder)
261
+
262
+ assert_equal value_to_read, read_value
263
+ end
264
+ end
265
+
266
+ def test_skip_union
267
+ ["hello", -1, 32, nil].each do |value_to_skip|
268
+ value_to_read = 6253
269
+
270
+ schema = Tros::Schema.parse('["int", "string", "null"]')
271
+ writer = StringIO.new
272
+ encoder = Tros::IO::BinaryEncoder.new(writer)
273
+ datum_writer = Tros::IO::DatumWriter.new(schema)
274
+ datum_writer.write(value_to_skip, encoder)
275
+ datum_writer.write(value_to_read, encoder)
276
+
277
+ reader = StringIO.new(writer.string)
278
+ decoder = Tros::IO::BinaryDecoder.new(reader)
279
+ datum_reader = Tros::IO::DatumReader.new(schema)
280
+ datum_reader.skip_data(schema, decoder)
281
+ read_value = datum_reader.read(decoder)
282
+
283
+ assert_equal value_to_read, read_value
284
+ end
285
+ end
286
+
287
+
288
+ def test_schema_promotion
289
+ promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
290
+ incorrect = 0
291
+ promotable_schemas.each_with_index do |ws, i|
292
+ writers_schema = Tros::Schema.parse(ws)
293
+ datum_to_write = 219
294
+ for rs in promotable_schemas[(i + 1)..-1]
295
+ readers_schema = Tros::Schema.parse(rs)
296
+ writer, enc, dw = write_datum(datum_to_write, writers_schema)
297
+ datum_read = read_datum(writer, writers_schema, readers_schema)
298
+ if datum_read != datum_to_write
299
+ incorrect += 1
300
+ end
301
+ end
302
+ assert_equal(incorrect, 0)
303
+ end
304
+ end
305
+
306
+ private
307
+
308
+ def check_default(schema_json, default_json, default_value)
309
+ check(schema_json)
310
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
311
+ actual = Tros::Schema.parse(actual_schema)
312
+
313
+ expected_schema = <<-EOS
314
+ {
315
+ "type": "record",
316
+ "name": "Foo",
317
+ "fields": [
318
+ {"name": "f", "type": #{schema_json},
319
+ "default": #{default_json}}
320
+ ]
321
+ }
322
+ EOS
323
+ expected = Tros::Schema.parse(expected_schema)
324
+
325
+ reader = Tros::IO::DatumReader.new(actual, expected)
326
+ record = reader.read(Tros::IO::BinaryDecoder.new(StringIO.new))
327
+ assert_equal default_value, record["f"]
328
+ end
329
+
330
+ def check(str)
331
+ # parse schema, then convert back to string
332
+ schema = Tros::Schema.parse(str)
333
+
334
+ parsed_string = schema.to_s
335
+
336
+ # test that the round-trip didn't mess up anything
337
+ # NB: I don't think we should do this. Why enforce ordering?
338
+ assert_equal JSON.parse("[#{str}]").first.to_json, parsed_string
339
+
340
+ # test __eq__
341
+ assert_equal schema, Tros::Schema.parse(str)
342
+
343
+ # test hashcode doesn't generate infinite recursion
344
+ schema.hash
345
+
346
+ # test serialization of random data
347
+ randomdata = RandomData.new(schema)
348
+ 9.times { check_serialization(schema, randomdata) }
349
+
350
+ # test writing of data to file
351
+ check_datafile(schema)
352
+ end
353
+
354
+ def check_serialization(schm, randomdata)
355
+ datum = randomdata.next
356
+ assert validate(schm, datum)
357
+ w = Tros::IO::DatumWriter.new(schm)
358
+ writer = StringIO.new("", "w")
359
+ w.write(datum, Tros::IO::BinaryEncoder.new(writer))
360
+ r = datum_reader(schm)
361
+ reader = StringIO.new(writer.string)
362
+ ob = r.read(Tros::IO::BinaryDecoder.new(reader))
363
+ assert_equal(datum, ob) # FIXME check on assertdata conditional
364
+ end
365
+
366
+ def check_datafile(schm)
367
+ seed = 0
368
+ count = 10
369
+ random_data = RandomData.new(schm, seed)
370
+
371
+
372
+ f = File.open(DATAFILE, 'wb')
373
+ dw = Tros::DataFile::Writer.new(f, datum_writer(schm), schm)
374
+ count.times{ dw << random_data.next }
375
+ dw.close
376
+
377
+ random_data = RandomData.new(schm, seed)
378
+
379
+
380
+ f = File.open(DATAFILE, 'r+')
381
+ dr = Tros::DataFile::Reader.new(f, datum_reader(schm))
382
+
383
+ last_index = nil
384
+ dr.each_with_index do |data, c|
385
+ last_index = c
386
+ # FIXME assertdata conditional
387
+ assert_equal(random_data.next, data)
388
+ end
389
+ dr.close
390
+ assert_equal count, last_index+1
391
+ end
392
+
393
+ def validate(schm, datum)
394
+ Tros::Schema.validate(schm, datum)
395
+ end
396
+
397
+ def datum_writer(schm)
398
+ Tros::IO::DatumWriter.new(schm)
399
+ end
400
+
401
+ def datum_reader(schm)
402
+ Tros::IO::DatumReader.new(schm)
403
+ end
404
+
405
+ def write_datum(datum, writers_schema)
406
+ writer = StringIO.new
407
+ encoder = Tros::IO::BinaryEncoder.new(writer)
408
+ datum_writer = Tros::IO::DatumWriter.new(writers_schema)
409
+ datum_writer.write(datum, encoder)
410
+ [writer, encoder, datum_writer]
411
+ end
412
+
413
+ def read_datum(buffer, writers_schema, readers_schema=nil)
414
+ reader = StringIO.new(buffer.string)
415
+ decoder = Tros::IO::BinaryDecoder.new(reader)
416
+ datum_reader = Tros::IO::DatumReader.new(writers_schema, readers_schema)
417
+ datum_reader.read(decoder)
418
+ end
419
+ end