tros 1.7.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+ {"namespace":"org.apache.avro.mapred.tether",
2
+ "protocol": "OutputProtocol",
3
+ "doc": "Transmit outputs from a map or reduce task to parent.",
4
+
5
+ "messages": {
6
+
7
+ "configure": {
8
+ "doc": "Configure task. Sent before any other message.",
9
+ "request": [
10
+ {"name": "port", "type": "int",
11
+ "doc": "The port to transmit inputs to this task on."}
12
+ ],
13
+ "response": "null",
14
+ "one-way": true
15
+ },
16
+
17
+ "output": {
18
+ "doc": "Send an output datum.",
19
+ "request": [
20
+ {"name": "datum", "type": "bytes",
21
+ "doc": "A binary-encoded instance of the declared schema."}
22
+ ],
23
+ "response": "null",
24
+ "one-way": true
25
+ },
26
+
27
+ "outputPartitioned": {
28
+ "doc": "Send map output datum explicitly naming its partition.",
29
+ "request": [
30
+ {"name": "partition", "type": "int",
31
+ "doc": "The map output partition for this datum."},
32
+ {"name": "datum", "type": "bytes",
33
+ "doc": "A binary-encoded instance of the declared schema."}
34
+ ],
35
+ "response": "null",
36
+ "one-way": true
37
+ },
38
+
39
+ "status": {
40
+ "doc": "Update the task's status message. Also acts as keepalive.",
41
+ "request": [
42
+ {"name": "message", "type": "string",
43
+ "doc": "The new status message for the task."}
44
+ ],
45
+ "response": "null",
46
+ "one-way": true
47
+ },
48
+
49
+ "count": {
50
+ "doc": "Increment a task/job counter.",
51
+ "request": [
52
+ {"name": "group", "type": "string",
53
+ "doc": "The name of the counter group."},
54
+ {"name": "name", "type": "string",
55
+ "doc": "The name of the counter to increment."},
56
+ {"name": "amount", "type": "long",
57
+ "doc": "The amount to incrment the counter."}
58
+ ],
59
+ "response": "null",
60
+ "one-way": true
61
+ },
62
+
63
+ "fail": {
64
+ "doc": "Called by a failing task to abort.",
65
+ "request": [
66
+ {"name": "message", "type": "string",
67
+ "doc": "The reason for failure."}
68
+ ],
69
+ "response": "null",
70
+ "one-way": true
71
+ },
72
+
73
+ "complete": {
74
+ "doc": "Called when a task's output has completed without error.",
75
+ "request": [],
76
+ "response": "null",
77
+ "one-way": true
78
+ }
79
+
80
+ }
81
+
82
+ }
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ class RandomData
19
+ def initialize(schm, seed=nil)
20
+ srand(seed) if seed
21
+ @seed = seed
22
+ @schm = schm
23
+ end
24
+
25
+ def next
26
+ nextdata(@schm)
27
+ end
28
+
29
+ def nextdata(schm, d=0)
30
+ case schm.type_sym
31
+ when :boolean
32
+ rand > 0.5
33
+ when :string
34
+ randstr()
35
+ when :int
36
+ rand(Tros::Schema::INT_MAX_VALUE - Tros::Schema::INT_MIN_VALUE) + Tros::Schema::INT_MIN_VALUE
37
+ when :long
38
+ rand(Tros::Schema::LONG_MAX_VALUE - Tros::Schema::LONG_MIN_VALUE) + Tros::Schema::LONG_MIN_VALUE
39
+ when :float
40
+ (-1024 + 2048 * rand).round.to_f
41
+ when :double
42
+ Tros::Schema::LONG_MIN_VALUE + (Tros::Schema::LONG_MAX_VALUE - Tros::Schema::LONG_MIN_VALUE) * rand
43
+ when :bytes
44
+ randstr(BYTEPOOL)
45
+ when :null
46
+ nil
47
+ when :array
48
+ arr = []
49
+ len = rand(5) + 2 - d
50
+ len = 0 if len < 0
51
+ len.times{ arr << nextdata(schm.items, d+1) }
52
+ arr
53
+ when :map
54
+ map = {}
55
+ len = rand(5) + 2 - d
56
+ len = 0 if len < 0
57
+ len.times do
58
+ map[nextdata(Tros::Schema::PrimitiveSchema.new(:string))] = nextdata(schm.values, d+1)
59
+ end
60
+ map
61
+ when :record, :error
62
+ m = {}
63
+ schm.fields.each do |field|
64
+ m[field.name] = nextdata(field.type, d+1)
65
+ end
66
+ m
67
+ when :union
68
+ types = schm.schemas
69
+ nextdata(types[rand(types.size)], d)
70
+ when :enum
71
+ symbols = schm.symbols
72
+ len = symbols.size
73
+ return nil if len == 0
74
+ symbols[rand(len)]
75
+ when :fixed
76
+ f = ""
77
+ schm.size.times { f << BYTEPOOL[rand(BYTEPOOL.size), 1] }
78
+ f
79
+ end
80
+ end
81
+
82
+ CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
83
+ BYTEPOOL = '12345abcd'
84
+
85
+ def randstr(chars=CHARPOOL, length=20)
86
+ str = ''
87
+ rand(length+1).times { str << chars[rand(chars.size)] }
88
+ str
89
+ end
90
+ end
@@ -0,0 +1,419 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'test_helper'
18
+
19
+ class IOTest < Minitest::Test
20
+ DATAFILE = File.join(TMP_DIR, 'io_test.avro')
21
+
22
+ def test_null
23
+ check_default('"null"', "null", nil)
24
+ end
25
+
26
+ def test_boolean
27
+ check_default('"boolean"', "true", true)
28
+ check_default('"boolean"', "false", false)
29
+ end
30
+
31
+ def test_string
32
+ check_default('"string"', '"foo"', "foo")
33
+ end
34
+
35
+ def test_bytes
36
+ check_default('"bytes"', '"foo"', "foo")
37
+ end
38
+
39
+ def test_int
40
+ check_default('"int"', "5", 5)
41
+ end
42
+
43
+ def test_long
44
+ check_default('"long"', "9", 9)
45
+ end
46
+
47
+ def test_float
48
+ check_default('"float"', "1.2", 1.2)
49
+ end
50
+
51
+ def test_double
52
+ check_default('"double"', "1.2", 1.2)
53
+ end
54
+
55
+ def test_array
56
+ array_schema = '{"type": "array", "items": "long"}'
57
+ check_default(array_schema, "[1]", [1])
58
+ end
59
+
60
+ def test_map
61
+ map_schema = '{"type": "map", "values": "long"}'
62
+ check_default(map_schema, '{"a": 1}', {"a" => 1})
63
+ end
64
+
65
+ def test_record
66
+ record_schema = <<-EOS
67
+ {"type": "record",
68
+ "name": "Test",
69
+ "fields": [{"name": "f",
70
+ "type": "long"}]}
71
+ EOS
72
+ check_default(record_schema, '{"f": 11}', {"f" => 11})
73
+ end
74
+
75
+ def test_error
76
+ error_schema = <<EOS
77
+ {"type": "error",
78
+ "name": "TestError",
79
+ "fields": [{"name": "message",
80
+ "type": "string"}]}
81
+ EOS
82
+ check_default(error_schema, '{"message": "boom"}', {"message" => "boom"})
83
+ end
84
+
85
+ def test_enum
86
+ enum_schema = '{"type": "enum", "name": "Test","symbols": ["A", "B"]}'
87
+ check_default(enum_schema, '"B"', "B")
88
+ end
89
+
90
+ def test_recursive
91
+ recursive_schema = <<-EOS
92
+ {
93
+ "type": "record",
94
+ "name": "Node",
95
+ "fields": [
96
+ { "name": "label", "type": "string"},
97
+ { "name": "children", "type": { "type": "array", "items": "Node"} }
98
+ ]
99
+ }
100
+ EOS
101
+ check(recursive_schema)
102
+ end
103
+
104
+ def test_union
105
+ union_schema = <<-EOS
106
+ [
107
+ "string",
108
+ "null",
109
+ "long",
110
+ {
111
+ "type": "record",
112
+ "name": "Cons",
113
+ "fields": [
114
+ {"name": "car", "type": "string"},
115
+ {"name": "cdr", "type": "string"}
116
+ ]
117
+ }
118
+ ]
119
+ EOS
120
+ check(union_schema)
121
+ check_default('["double", "long"]', "1.1", 1.1)
122
+ end
123
+
124
+ def test_lisp
125
+ lisp_schema = <<-EOS
126
+ {
127
+ "type": "record",
128
+ "name": "Lisp",
129
+ "fields": [
130
+ {
131
+ "name": "value",
132
+ "type": [
133
+ "null",
134
+ "string",
135
+ {
136
+ "type": "record",
137
+ "name": "Cons",
138
+ "fields": [
139
+ { "name": "car", "type": "Lisp" },
140
+ { "name": "cdr", "type": "Lisp" }
141
+ ]
142
+ }
143
+ ]
144
+ }
145
+ ]
146
+ }
147
+ EOS
148
+ check(lisp_schema)
149
+ end
150
+
151
+ def test_fixed
152
+ fixed_schema = '{"type": "fixed", "name": "Test", "size": 1}'
153
+ check_default(fixed_schema, '"a"', "a")
154
+ end
155
+
156
+ def test_enum_with_duplicate
157
+ str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
158
+ assert_raises(Tros::SchemaParseError) do
159
+ schema = Tros::Schema.parse(str)
160
+ end
161
+ end
162
+
163
+ BINARY_INT_ENCODINGS = [
164
+ [0, '00'],
165
+ [-1, '01'],
166
+ [1, '02'],
167
+ [-2, '03'],
168
+ [2, '04'],
169
+ [-64, '7f'],
170
+ [64, '80 01'],
171
+ [8192, '80 80 01'],
172
+ [-8193, '81 80 01'],
173
+ ]
174
+
175
+ def tros_hexlify(reader)
176
+ bytes = []
177
+ current_byte = reader.read(1)
178
+ bytes << hexlify(current_byte)
179
+ while (current_byte.unpack('C').first & 0x80) != 0
180
+ current_byte = reader.read(1)
181
+ bytes << hexlify(current_byte)
182
+ end
183
+ bytes.join ' '
184
+ end
185
+
186
+ def hexlify(msg)
187
+ msg.unpack("H*")
188
+ end
189
+
190
+ def test_binary_int_encoding
191
+ for value, hex_encoding in BINARY_INT_ENCODINGS
192
+ # write datum in binary to string buffer
193
+ buffer = StringIO.new
194
+ encoder = Tros::IO::BinaryEncoder.new(buffer)
195
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"int"'))
196
+ datum_writer.write(value, encoder)
197
+
198
+ buffer.seek(0)
199
+ hex_val = tros_hexlify(buffer)
200
+
201
+ assert_equal hex_encoding, hex_val
202
+ end
203
+ end
204
+
205
+ def test_binary_long_encoding
206
+ for value, hex_encoding in BINARY_INT_ENCODINGS
207
+ buffer = StringIO.new
208
+ encoder = Tros::IO::BinaryEncoder.new(buffer)
209
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"long"'))
210
+ datum_writer.write(value, encoder)
211
+
212
+ # read it out of the buffer and hexlify it
213
+ buffer.seek(0)
214
+ hex_val = tros_hexlify(buffer)
215
+
216
+ assert_equal hex_encoding, hex_val
217
+ end
218
+ end
219
+
220
+ def test_skip_long
221
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
222
+ value_to_read = 6253
223
+
224
+ # write some data in binary to string buffer
225
+ writer = StringIO.new
226
+ encoder = Tros::IO::BinaryEncoder.new(writer)
227
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"long"'))
228
+ datum_writer.write(value_to_skip, encoder)
229
+ datum_writer.write(value_to_read, encoder)
230
+
231
+ # skip the value
232
+ reader = StringIO.new(writer.string())
233
+ decoder = Tros::IO::BinaryDecoder.new(reader)
234
+ decoder.skip_long()
235
+
236
+ # read data from string buffer
237
+ datum_reader = Tros::IO::DatumReader.new(Tros::Schema.parse('"long"'))
238
+ read_value = datum_reader.read(decoder)
239
+
240
+ # check it
241
+ assert_equal value_to_read, read_value
242
+ end
243
+ end
244
+
245
+ def test_skip_int
246
+ for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
247
+ value_to_read = 6253
248
+
249
+ writer = StringIO.new
250
+ encoder = Tros::IO::BinaryEncoder.new(writer)
251
+ datum_writer = Tros::IO::DatumWriter.new(Tros::Schema.parse('"int"'))
252
+ datum_writer.write(value_to_skip, encoder)
253
+ datum_writer.write(value_to_read, encoder)
254
+
255
+ reader = StringIO.new(writer.string)
256
+ decoder = Tros::IO::BinaryDecoder.new(reader)
257
+ decoder.skip_int
258
+
259
+ datum_reader = Tros::IO::DatumReader.new(Tros::Schema.parse('"int"'))
260
+ read_value = datum_reader.read(decoder)
261
+
262
+ assert_equal value_to_read, read_value
263
+ end
264
+ end
265
+
266
+ def test_skip_union
267
+ ["hello", -1, 32, nil].each do |value_to_skip|
268
+ value_to_read = 6253
269
+
270
+ schema = Tros::Schema.parse('["int", "string", "null"]')
271
+ writer = StringIO.new
272
+ encoder = Tros::IO::BinaryEncoder.new(writer)
273
+ datum_writer = Tros::IO::DatumWriter.new(schema)
274
+ datum_writer.write(value_to_skip, encoder)
275
+ datum_writer.write(value_to_read, encoder)
276
+
277
+ reader = StringIO.new(writer.string)
278
+ decoder = Tros::IO::BinaryDecoder.new(reader)
279
+ datum_reader = Tros::IO::DatumReader.new(schema)
280
+ datum_reader.skip_data(schema, decoder)
281
+ read_value = datum_reader.read(decoder)
282
+
283
+ assert_equal value_to_read, read_value
284
+ end
285
+ end
286
+
287
+
288
+ def test_schema_promotion
289
+ promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
290
+ incorrect = 0
291
+ promotable_schemas.each_with_index do |ws, i|
292
+ writers_schema = Tros::Schema.parse(ws)
293
+ datum_to_write = 219
294
+ for rs in promotable_schemas[(i + 1)..-1]
295
+ readers_schema = Tros::Schema.parse(rs)
296
+ writer, enc, dw = write_datum(datum_to_write, writers_schema)
297
+ datum_read = read_datum(writer, writers_schema, readers_schema)
298
+ if datum_read != datum_to_write
299
+ incorrect += 1
300
+ end
301
+ end
302
+ assert_equal(incorrect, 0)
303
+ end
304
+ end
305
+
306
+ private
307
+
308
+ def check_default(schema_json, default_json, default_value)
309
+ check(schema_json)
310
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
311
+ actual = Tros::Schema.parse(actual_schema)
312
+
313
+ expected_schema = <<-EOS
314
+ {
315
+ "type": "record",
316
+ "name": "Foo",
317
+ "fields": [
318
+ {"name": "f", "type": #{schema_json},
319
+ "default": #{default_json}}
320
+ ]
321
+ }
322
+ EOS
323
+ expected = Tros::Schema.parse(expected_schema)
324
+
325
+ reader = Tros::IO::DatumReader.new(actual, expected)
326
+ record = reader.read(Tros::IO::BinaryDecoder.new(StringIO.new))
327
+ assert_equal default_value, record["f"]
328
+ end
329
+
330
+ def check(str)
331
+ # parse schema, then convert back to string
332
+ schema = Tros::Schema.parse(str)
333
+
334
+ parsed_string = schema.to_s
335
+
336
+ # test that the round-trip didn't mess up anything
337
+ # NB: I don't think we should do this. Why enforce ordering?
338
+ assert_equal JSON.parse("[#{str}]").first.to_json, parsed_string
339
+
340
+ # test __eq__
341
+ assert_equal schema, Tros::Schema.parse(str)
342
+
343
+ # test hashcode doesn't generate infinite recursion
344
+ schema.hash
345
+
346
+ # test serialization of random data
347
+ randomdata = RandomData.new(schema)
348
+ 9.times { check_serialization(schema, randomdata) }
349
+
350
+ # test writing of data to file
351
+ check_datafile(schema)
352
+ end
353
+
354
+ def check_serialization(schm, randomdata)
355
+ datum = randomdata.next
356
+ assert validate(schm, datum)
357
+ w = Tros::IO::DatumWriter.new(schm)
358
+ writer = StringIO.new("", "w")
359
+ w.write(datum, Tros::IO::BinaryEncoder.new(writer))
360
+ r = datum_reader(schm)
361
+ reader = StringIO.new(writer.string)
362
+ ob = r.read(Tros::IO::BinaryDecoder.new(reader))
363
+ assert_equal(datum, ob) # FIXME check on assertdata conditional
364
+ end
365
+
366
+ def check_datafile(schm)
367
+ seed = 0
368
+ count = 10
369
+ random_data = RandomData.new(schm, seed)
370
+
371
+
372
+ f = File.open(DATAFILE, 'wb')
373
+ dw = Tros::DataFile::Writer.new(f, datum_writer(schm), schm)
374
+ count.times{ dw << random_data.next }
375
+ dw.close
376
+
377
+ random_data = RandomData.new(schm, seed)
378
+
379
+
380
+ f = File.open(DATAFILE, 'r+')
381
+ dr = Tros::DataFile::Reader.new(f, datum_reader(schm))
382
+
383
+ last_index = nil
384
+ dr.each_with_index do |data, c|
385
+ last_index = c
386
+ # FIXME assertdata conditional
387
+ assert_equal(random_data.next, data)
388
+ end
389
+ dr.close
390
+ assert_equal count, last_index+1
391
+ end
392
+
393
+ def validate(schm, datum)
394
+ Tros::Schema.validate(schm, datum)
395
+ end
396
+
397
+ def datum_writer(schm)
398
+ Tros::IO::DatumWriter.new(schm)
399
+ end
400
+
401
+ def datum_reader(schm)
402
+ Tros::IO::DatumReader.new(schm)
403
+ end
404
+
405
+ def write_datum(datum, writers_schema)
406
+ writer = StringIO.new
407
+ encoder = Tros::IO::BinaryEncoder.new(writer)
408
+ datum_writer = Tros::IO::DatumWriter.new(writers_schema)
409
+ datum_writer.write(datum, encoder)
410
+ [writer, encoder, datum_writer]
411
+ end
412
+
413
+ def read_datum(buffer, writers_schema, readers_schema=nil)
414
+ reader = StringIO.new(buffer.string)
415
+ decoder = Tros::IO::BinaryDecoder.new(reader)
416
+ datum_reader = Tros::IO::DatumReader.new(writers_schema, readers_schema)
417
+ datum_reader.read(decoder)
418
+ end
419
+ end