tros 1.7.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.travis.yml +13 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +18 -0
- data/README.md +18 -0
- data/Rakefile +25 -0
- data/lib/tros.rb +39 -0
- data/lib/tros/data_file.rb +342 -0
- data/lib/tros/io.rb +610 -0
- data/lib/tros/ipc.rb +550 -0
- data/lib/tros/protocol.rb +161 -0
- data/lib/tros/schema.rb +405 -0
- data/lib/tros/version.rb +3 -0
- data/test/datafile_test.rb +193 -0
- data/test/fixtures/schemas/org/apache/avro/data/Json.avsc +15 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/HandshakeRequest.avsc +11 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/HandshakeResponse.avsc +15 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/trace/avroTrace.avdl +68 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/trace/avroTrace.avpr +82 -0
- data/test/fixtures/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr +64 -0
- data/test/fixtures/schemas/org/apache/avro/mapred/tether/OutputProtocol.avpr +82 -0
- data/test/helpers/random_data.rb +90 -0
- data/test/io_test.rb +419 -0
- data/test/protocol_test.rb +195 -0
- data/test/sample_ipc_client.rb +85 -0
- data/test/sample_ipc_http_client.rb +84 -0
- data/test/sample_ipc_http_server.rb +79 -0
- data/test/sample_ipc_server.rb +92 -0
- data/test/schema_test.rb +135 -0
- data/test/socket_transport_test.rb +40 -0
- data/test/test_helper.rb +26 -0
- data/test/tool.rb +144 -0
- data/tros.gemspec +32 -0
- metadata +137 -0
data/lib/tros/version.rb
ADDED
@@ -0,0 +1,193 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'test_helper'
|
19
|
+
|
20
|
+
class DataFileTest < Minitest::Test
|
21
|
+
TEST_FILE = File.join(TMP_DIR, 'data_file_test.avro')
|
22
|
+
|
23
|
+
def setup
|
24
|
+
File.unlink(TEST_FILE) if File.exists?(TEST_FILE)
|
25
|
+
end
|
26
|
+
|
27
|
+
def teardown
|
28
|
+
File.unlink(TEST_FILE) if File.exists?(TEST_FILE)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_differing_schemas_with_primitives
|
32
|
+
writer_schema = <<-JSON
|
33
|
+
{
|
34
|
+
"type": "record",
|
35
|
+
"name": "User",
|
36
|
+
"fields" : [
|
37
|
+
{"name": "username", "type": "string"},
|
38
|
+
{"name": "age", "type": "int"},
|
39
|
+
{"name": "verified", "type": "boolean", "default": "false"}
|
40
|
+
]
|
41
|
+
}
|
42
|
+
JSON
|
43
|
+
|
44
|
+
data = [{"username" => "john", "age" => 25, "verified" => true},
|
45
|
+
{"username" => "ryan", "age" => 23, "verified" => false}]
|
46
|
+
|
47
|
+
Tros::DataFile.open(TEST_FILE, 'w', writer_schema) do |dw|
|
48
|
+
data.each{|h| dw << h }
|
49
|
+
end
|
50
|
+
|
51
|
+
# extract the username only from the tros serialized file
|
52
|
+
reader_schema = <<-JSON
|
53
|
+
{
|
54
|
+
"type": "record",
|
55
|
+
"name": "User",
|
56
|
+
"fields" : [
|
57
|
+
{"name": "username", "type": "string"}
|
58
|
+
]
|
59
|
+
}
|
60
|
+
JSON
|
61
|
+
|
62
|
+
Tros::DataFile.open(TEST_FILE, 'r', reader_schema) do |dr|
|
63
|
+
dr.each_with_index do |record, i|
|
64
|
+
assert_equal data[i]['username'], record['username']
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_differing_schemas_with_complex_objects
|
70
|
+
writer_schema = <<-JSON
|
71
|
+
{
|
72
|
+
"type": "record",
|
73
|
+
"name": "something",
|
74
|
+
"fields": [
|
75
|
+
{"name": "something_fixed", "type": {"name": "inner_fixed",
|
76
|
+
"type": "fixed", "size": 3}},
|
77
|
+
{"name": "something_enum", "type": {"name": "inner_enum",
|
78
|
+
"type": "enum",
|
79
|
+
"symbols": ["hello", "goodbye"]}},
|
80
|
+
{"name": "something_array", "type": {"type": "array", "items": "int"}},
|
81
|
+
{"name": "something_map", "type": {"type": "map", "values": "int"}},
|
82
|
+
{"name": "something_record", "type": {"name": "inner_record",
|
83
|
+
"type": "record",
|
84
|
+
"fields": [
|
85
|
+
{"name": "inner", "type": "int"}
|
86
|
+
]}},
|
87
|
+
{"name": "username", "type": "string"}
|
88
|
+
]
|
89
|
+
}
|
90
|
+
JSON
|
91
|
+
|
92
|
+
data = [{"username" => "john",
|
93
|
+
"something_fixed" => "foo",
|
94
|
+
"something_enum" => "hello",
|
95
|
+
"something_array" => [1,2,3],
|
96
|
+
"something_map" => {"a" => 1, "b" => 2},
|
97
|
+
"something_record" => {"inner" => 2},
|
98
|
+
"something_error" => {"code" => 403}
|
99
|
+
},
|
100
|
+
{"username" => "ryan",
|
101
|
+
"something_fixed" => "bar",
|
102
|
+
"something_enum" => "goodbye",
|
103
|
+
"something_array" => [1,2,3],
|
104
|
+
"something_map" => {"a" => 2, "b" => 6},
|
105
|
+
"something_record" => {"inner" => 1},
|
106
|
+
"something_error" => {"code" => 401}
|
107
|
+
}]
|
108
|
+
|
109
|
+
Tros::DataFile.open(TEST_FILE, 'w', writer_schema) do |dw|
|
110
|
+
data.each{|d| dw << d }
|
111
|
+
end
|
112
|
+
|
113
|
+
%w[fixed enum record error array map union].each do |s|
|
114
|
+
reader = JSON.load(writer_schema)
|
115
|
+
reader['fields'] = reader['fields'].reject{|f| f['type']['type'] == s}
|
116
|
+
Tros::DataFile.open(TEST_FILE, 'r', JSON.dump(reader)) do |dr|
|
117
|
+
dr.each_with_index do |obj, i|
|
118
|
+
reader['fields'].each do |field|
|
119
|
+
assert_equal data[i][field['name']], obj[field['name']]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_data_writer_handles_sync_interval
|
127
|
+
writer_schema = <<-JSON
|
128
|
+
{
|
129
|
+
"type": "record",
|
130
|
+
"name": "something",
|
131
|
+
"fields": [
|
132
|
+
{"name": "something_boolean", "type": "boolean"}
|
133
|
+
]
|
134
|
+
}
|
135
|
+
JSON
|
136
|
+
|
137
|
+
data = {"something_boolean" => true }
|
138
|
+
|
139
|
+
Tros::DataFile.open(TEST_FILE, 'w', writer_schema) do |dw|
|
140
|
+
while dw.writer.tell < Tros::DataFile::SYNC_INTERVAL
|
141
|
+
dw << data
|
142
|
+
end
|
143
|
+
block_count = dw.block_count
|
144
|
+
dw << data
|
145
|
+
# ensure we didn't just write another block
|
146
|
+
assert_equal(block_count+1, dw.block_count)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_utf8
|
151
|
+
datafile = Tros::DataFile.open(TEST_FILE, 'w', '"string"')
|
152
|
+
datafile << "家"
|
153
|
+
datafile.close
|
154
|
+
|
155
|
+
datafile = Tros::DataFile.open(TEST_FILE)
|
156
|
+
datafile.each do |s|
|
157
|
+
assert_equal "家", s
|
158
|
+
end
|
159
|
+
datafile.close
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_deflate
|
163
|
+
Tros::DataFile.open(TEST_FILE, 'w', '"string"', :deflate) do |writer|
|
164
|
+
writer << 'a' * 10_000
|
165
|
+
end
|
166
|
+
assert(File.size(TEST_FILE) < 500)
|
167
|
+
|
168
|
+
records = []
|
169
|
+
Tros::DataFile.open(TEST_FILE) do |reader|
|
170
|
+
reader.each { |record| records << record }
|
171
|
+
end
|
172
|
+
assert_equal records, ['a' * 10_000]
|
173
|
+
end
|
174
|
+
|
175
|
+
def test_append_to_deflated_file
|
176
|
+
schema = Tros::Schema.parse('"string"')
|
177
|
+
writer = Tros::IO::DatumWriter.new(schema)
|
178
|
+
file = Tros::DataFile::Writer.new(File.open(TEST_FILE, 'wb'), writer, schema, :deflate)
|
179
|
+
file << 'a' * 10_000
|
180
|
+
file.close
|
181
|
+
|
182
|
+
file = Tros::DataFile::Writer.new(File.open(TEST_FILE, 'a+b'), writer)
|
183
|
+
file << 'b' * 10_000
|
184
|
+
file.close
|
185
|
+
assert(File.size(TEST_FILE) < 1_000)
|
186
|
+
|
187
|
+
records = []
|
188
|
+
Tros::DataFile.open(TEST_FILE) do |reader|
|
189
|
+
reader.each {|record| records << record }
|
190
|
+
end
|
191
|
+
assert_equal records, ['a' * 10_000, 'b' * 10_000]
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{"type": "record", "name": "Json", "namespace":"org.apache.avro.data",
|
2
|
+
"fields": [
|
3
|
+
{"name": "value",
|
4
|
+
"type": [
|
5
|
+
"long",
|
6
|
+
"double",
|
7
|
+
"string",
|
8
|
+
"boolean",
|
9
|
+
"null",
|
10
|
+
{"type": "array", "items": "Json"},
|
11
|
+
{"type": "map", "values": "Json"}
|
12
|
+
]
|
13
|
+
}
|
14
|
+
]
|
15
|
+
}
|
@@ -0,0 +1,11 @@
|
|
1
|
+
{
|
2
|
+
"type": "record",
|
3
|
+
"name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
|
4
|
+
"fields": [
|
5
|
+
{"name": "clientHash",
|
6
|
+
"type": {"type": "fixed", "name": "MD5", "size": 16}},
|
7
|
+
{"name": "clientProtocol", "type": ["null", "string"]},
|
8
|
+
{"name": "serverHash", "type": "MD5"},
|
9
|
+
{"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
|
10
|
+
]
|
11
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"type": "record",
|
3
|
+
"name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
|
4
|
+
"fields": [
|
5
|
+
{"name": "match",
|
6
|
+
"type": {"type": "enum", "name": "HandshakeMatch",
|
7
|
+
"symbols": ["BOTH", "CLIENT", "NONE"]}},
|
8
|
+
{"name": "serverProtocol",
|
9
|
+
"type": ["null", "string"]},
|
10
|
+
{"name": "serverHash",
|
11
|
+
"type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
|
12
|
+
{"name": "meta",
|
13
|
+
"type": ["null", {"type": "map", "values": "bytes"}]}
|
14
|
+
]
|
15
|
+
}
|
@@ -0,0 +1,68 @@
|
|
1
|
+
/**
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
* See the License for the specific language governing permissions and
|
16
|
+
* limitations under the License.
|
17
|
+
*/
|
18
|
+
|
19
|
+
/**
|
20
|
+
* A Span is our basic unit of tracing. It tracks the critical points
|
21
|
+
* of a single RPC call and records other call meta-data. It also
|
22
|
+
* allows arbitrary string annotations. Both the client and server create
|
23
|
+
* Span objects, each of which is populated with half of the relevant event
|
24
|
+
* data. They share a span ID, which allows us to merge them into one complete
|
25
|
+
* span later on.
|
26
|
+
*/
|
27
|
+
@namespace("org.apache.avro.ipc.trace")
|
28
|
+
|
29
|
+
protocol AvroTrace {
|
30
|
+
enum SpanEvent { SERVER_RECV, SERVER_SEND, CLIENT_RECV, CLIENT_SEND }
|
31
|
+
|
32
|
+
fixed ID(8);
|
33
|
+
|
34
|
+
record TimestampedEvent {
|
35
|
+
long timeStamp; // Unix time, in nanoseconds
|
36
|
+
union { SpanEvent, string} event;
|
37
|
+
}
|
38
|
+
|
39
|
+
/**
|
40
|
+
* An individual span is the basic unit of testing.
|
41
|
+
* The record is used by both \"client\" and \"server\".
|
42
|
+
*/
|
43
|
+
record Span {
|
44
|
+
ID traceID; // ID shared by all Spans in a given trace
|
45
|
+
ID spanID; // Random ID for this Span
|
46
|
+
union { ID, null } parentSpanID; // Parent Span ID (null if root Span)
|
47
|
+
string messageName; // Function call represented
|
48
|
+
long requestPayloadSize; // Size (bytes) of the request
|
49
|
+
long responsePayloadSize; // Size (byts) of the response
|
50
|
+
union { string, null} requestorHostname; // Hostname of requestor
|
51
|
+
// int requestorPort; // Port of the requestor (currently unused)
|
52
|
+
union { string, null } responderHostname; // Hostname of the responder
|
53
|
+
// int responderPort; // Port of the responder (currently unused)
|
54
|
+
array<TimestampedEvent> events; // List of critical events
|
55
|
+
boolean complete; // Whether includes data from both sides
|
56
|
+
}
|
57
|
+
|
58
|
+
/**
|
59
|
+
* Get all spans stored on this host.
|
60
|
+
*/
|
61
|
+
array<Span> getAllSpans();
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Get spans occuring between start and end. Each is a unix timestamp
|
65
|
+
* in nanosecond units (for consistency with TimestampedEvent).
|
66
|
+
*/
|
67
|
+
array<Span> getSpansInRange(long start, long end);
|
68
|
+
}
|
@@ -0,0 +1,82 @@
|
|
1
|
+
{
|
2
|
+
"protocol" : "AvroTrace",
|
3
|
+
"namespace" : "org.apache.avro.ipc.trace",
|
4
|
+
"types" : [ {
|
5
|
+
"type" : "enum",
|
6
|
+
"name" : "SpanEvent",
|
7
|
+
"symbols" : [ "SERVER_RECV", "SERVER_SEND", "CLIENT_RECV", "CLIENT_SEND" ]
|
8
|
+
}, {
|
9
|
+
"type" : "fixed",
|
10
|
+
"name" : "ID",
|
11
|
+
"size" : 8
|
12
|
+
}, {
|
13
|
+
"type" : "record",
|
14
|
+
"name" : "TimestampedEvent",
|
15
|
+
"fields" : [ {
|
16
|
+
"name" : "timeStamp",
|
17
|
+
"type" : "long"
|
18
|
+
}, {
|
19
|
+
"name" : "event",
|
20
|
+
"type" : [ "SpanEvent", "string" ]
|
21
|
+
} ]
|
22
|
+
}, {
|
23
|
+
"type" : "record",
|
24
|
+
"name" : "Span",
|
25
|
+
"fields" : [ {
|
26
|
+
"name" : "traceID",
|
27
|
+
"type" : "ID"
|
28
|
+
}, {
|
29
|
+
"name" : "spanID",
|
30
|
+
"type" : "ID"
|
31
|
+
}, {
|
32
|
+
"name" : "parentSpanID",
|
33
|
+
"type" : [ "ID", "null" ]
|
34
|
+
}, {
|
35
|
+
"name" : "messageName",
|
36
|
+
"type" : "string"
|
37
|
+
}, {
|
38
|
+
"name" : "requestPayloadSize",
|
39
|
+
"type" : "long"
|
40
|
+
}, {
|
41
|
+
"name" : "responsePayloadSize",
|
42
|
+
"type" : "long"
|
43
|
+
}, {
|
44
|
+
"name" : "requestorHostname",
|
45
|
+
"type" : [ "string", "null" ]
|
46
|
+
}, {
|
47
|
+
"name" : "responderHostname",
|
48
|
+
"type" : [ "string", "null" ]
|
49
|
+
}, {
|
50
|
+
"name" : "events",
|
51
|
+
"type" : {
|
52
|
+
"type" : "array",
|
53
|
+
"items" : "TimestampedEvent"
|
54
|
+
}
|
55
|
+
}, {
|
56
|
+
"name" : "complete",
|
57
|
+
"type" : "boolean"
|
58
|
+
} ]
|
59
|
+
} ],
|
60
|
+
"messages" : {
|
61
|
+
"getAllSpans" : {
|
62
|
+
"request" : [ ],
|
63
|
+
"response" : {
|
64
|
+
"type" : "array",
|
65
|
+
"items" : "Span"
|
66
|
+
}
|
67
|
+
},
|
68
|
+
"getSpansInRange" : {
|
69
|
+
"request" : [ {
|
70
|
+
"name" : "start",
|
71
|
+
"type" : "long"
|
72
|
+
}, {
|
73
|
+
"name" : "end",
|
74
|
+
"type" : "long"
|
75
|
+
} ],
|
76
|
+
"response" : {
|
77
|
+
"type" : "array",
|
78
|
+
"items" : "Span"
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
{"namespace":"org.apache.avro.mapred.tether",
|
2
|
+
"protocol": "InputProtocol",
|
3
|
+
"doc": "Transmit inputs to a map or reduce task sub-process.",
|
4
|
+
|
5
|
+
"types": [
|
6
|
+
{"name": "TaskType", "type": "enum", "symbols": ["MAP","REDUCE"]}
|
7
|
+
],
|
8
|
+
|
9
|
+
"messages": {
|
10
|
+
|
11
|
+
"configure": {
|
12
|
+
"doc": "Configure the task. Sent before any other message.",
|
13
|
+
"request": [
|
14
|
+
{"name": "taskType", "type": "TaskType",
|
15
|
+
"doc": "Whether this is a map or reduce task."},
|
16
|
+
{"name": "inSchema", "type": "string",
|
17
|
+
"doc": "The Avro schema for task input data."},
|
18
|
+
{"name": "outSchema", "type": "string",
|
19
|
+
"doc": "The Avro schema for task output data."}
|
20
|
+
],
|
21
|
+
"response": "null",
|
22
|
+
"one-way": true
|
23
|
+
},
|
24
|
+
|
25
|
+
"partitions": {
|
26
|
+
"doc": "Set the number of map output partitions.",
|
27
|
+
"request": [
|
28
|
+
{"name": "partitions", "type": "int",
|
29
|
+
"doc": "The number of map output partitions."}
|
30
|
+
],
|
31
|
+
"response": "null",
|
32
|
+
"one-way": true
|
33
|
+
},
|
34
|
+
|
35
|
+
"input": {
|
36
|
+
"doc": "Send a block of input data to a task.",
|
37
|
+
"request": [
|
38
|
+
{"name": "data", "type": "bytes",
|
39
|
+
"doc": "A sequence of instances of the declared schema."},
|
40
|
+
{"name": "count", "type": "long",
|
41
|
+
"default": 1,
|
42
|
+
"doc": "The number of instances in this block."}
|
43
|
+
],
|
44
|
+
"response": "null",
|
45
|
+
"one-way": true
|
46
|
+
},
|
47
|
+
|
48
|
+
"abort": {
|
49
|
+
"doc": "Called to abort the task.",
|
50
|
+
"request": [],
|
51
|
+
"response": "null",
|
52
|
+
"one-way": true
|
53
|
+
},
|
54
|
+
|
55
|
+
"complete": {
|
56
|
+
"doc": "Called when a task's input is complete.",
|
57
|
+
"request": [],
|
58
|
+
"response": "null",
|
59
|
+
"one-way": true
|
60
|
+
}
|
61
|
+
|
62
|
+
}
|
63
|
+
|
64
|
+
}
|