tros 1.7.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ module Tros
2
+ VERSION = "1.7.6.1"
3
+ end
@@ -0,0 +1,193 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'test_helper'
19
+
20
+ class DataFileTest < Minitest::Test
21
+ TEST_FILE = File.join(TMP_DIR, 'data_file_test.avro')
22
+
23
+ def setup
24
+ File.unlink(TEST_FILE) if File.exists?(TEST_FILE)
25
+ end
26
+
27
+ def teardown
28
+ File.unlink(TEST_FILE) if File.exists?(TEST_FILE)
29
+ end
30
+
31
+ def test_differing_schemas_with_primitives
32
+ writer_schema = <<-JSON
33
+ {
34
+ "type": "record",
35
+ "name": "User",
36
+ "fields" : [
37
+ {"name": "username", "type": "string"},
38
+ {"name": "age", "type": "int"},
39
+ {"name": "verified", "type": "boolean", "default": "false"}
40
+ ]
41
+ }
42
+ JSON
43
+
44
+ data = [{"username" => "john", "age" => 25, "verified" => true},
45
+ {"username" => "ryan", "age" => 23, "verified" => false}]
46
+
47
+ Tros::DataFile.open(TEST_FILE, 'w', writer_schema) do |dw|
48
+ data.each{|h| dw << h }
49
+ end
50
+
51
+ # extract the username only from the tros serialized file
52
+ reader_schema = <<-JSON
53
+ {
54
+ "type": "record",
55
+ "name": "User",
56
+ "fields" : [
57
+ {"name": "username", "type": "string"}
58
+ ]
59
+ }
60
+ JSON
61
+
62
+ Tros::DataFile.open(TEST_FILE, 'r', reader_schema) do |dr|
63
+ dr.each_with_index do |record, i|
64
+ assert_equal data[i]['username'], record['username']
65
+ end
66
+ end
67
+ end
68
+
69
+ def test_differing_schemas_with_complex_objects
70
+ writer_schema = <<-JSON
71
+ {
72
+ "type": "record",
73
+ "name": "something",
74
+ "fields": [
75
+ {"name": "something_fixed", "type": {"name": "inner_fixed",
76
+ "type": "fixed", "size": 3}},
77
+ {"name": "something_enum", "type": {"name": "inner_enum",
78
+ "type": "enum",
79
+ "symbols": ["hello", "goodbye"]}},
80
+ {"name": "something_array", "type": {"type": "array", "items": "int"}},
81
+ {"name": "something_map", "type": {"type": "map", "values": "int"}},
82
+ {"name": "something_record", "type": {"name": "inner_record",
83
+ "type": "record",
84
+ "fields": [
85
+ {"name": "inner", "type": "int"}
86
+ ]}},
87
+ {"name": "username", "type": "string"}
88
+ ]
89
+ }
90
+ JSON
91
+
92
+ data = [{"username" => "john",
93
+ "something_fixed" => "foo",
94
+ "something_enum" => "hello",
95
+ "something_array" => [1,2,3],
96
+ "something_map" => {"a" => 1, "b" => 2},
97
+ "something_record" => {"inner" => 2},
98
+ "something_error" => {"code" => 403}
99
+ },
100
+ {"username" => "ryan",
101
+ "something_fixed" => "bar",
102
+ "something_enum" => "goodbye",
103
+ "something_array" => [1,2,3],
104
+ "something_map" => {"a" => 2, "b" => 6},
105
+ "something_record" => {"inner" => 1},
106
+ "something_error" => {"code" => 401}
107
+ }]
108
+
109
+ Tros::DataFile.open(TEST_FILE, 'w', writer_schema) do |dw|
110
+ data.each{|d| dw << d }
111
+ end
112
+
113
+ %w[fixed enum record error array map union].each do |s|
114
+ reader = JSON.load(writer_schema)
115
+ reader['fields'] = reader['fields'].reject{|f| f['type']['type'] == s}
116
+ Tros::DataFile.open(TEST_FILE, 'r', JSON.dump(reader)) do |dr|
117
+ dr.each_with_index do |obj, i|
118
+ reader['fields'].each do |field|
119
+ assert_equal data[i][field['name']], obj[field['name']]
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ def test_data_writer_handles_sync_interval
127
+ writer_schema = <<-JSON
128
+ {
129
+ "type": "record",
130
+ "name": "something",
131
+ "fields": [
132
+ {"name": "something_boolean", "type": "boolean"}
133
+ ]
134
+ }
135
+ JSON
136
+
137
+ data = {"something_boolean" => true }
138
+
139
+ Tros::DataFile.open(TEST_FILE, 'w', writer_schema) do |dw|
140
+ while dw.writer.tell < Tros::DataFile::SYNC_INTERVAL
141
+ dw << data
142
+ end
143
+ block_count = dw.block_count
144
+ dw << data
145
+ # ensure we didn't just write another block
146
+ assert_equal(block_count+1, dw.block_count)
147
+ end
148
+ end
149
+
150
+ def test_utf8
151
+ datafile = Tros::DataFile.open(TEST_FILE, 'w', '"string"')
152
+ datafile << "家"
153
+ datafile.close
154
+
155
+ datafile = Tros::DataFile.open(TEST_FILE)
156
+ datafile.each do |s|
157
+ assert_equal "家", s
158
+ end
159
+ datafile.close
160
+ end
161
+
162
+ def test_deflate
163
+ Tros::DataFile.open(TEST_FILE, 'w', '"string"', :deflate) do |writer|
164
+ writer << 'a' * 10_000
165
+ end
166
+ assert(File.size(TEST_FILE) < 500)
167
+
168
+ records = []
169
+ Tros::DataFile.open(TEST_FILE) do |reader|
170
+ reader.each { |record| records << record }
171
+ end
172
+ assert_equal records, ['a' * 10_000]
173
+ end
174
+
175
+ def test_append_to_deflated_file
176
+ schema = Tros::Schema.parse('"string"')
177
+ writer = Tros::IO::DatumWriter.new(schema)
178
+ file = Tros::DataFile::Writer.new(File.open(TEST_FILE, 'wb'), writer, schema, :deflate)
179
+ file << 'a' * 10_000
180
+ file.close
181
+
182
+ file = Tros::DataFile::Writer.new(File.open(TEST_FILE, 'a+b'), writer)
183
+ file << 'b' * 10_000
184
+ file.close
185
+ assert(File.size(TEST_FILE) < 1_000)
186
+
187
+ records = []
188
+ Tros::DataFile.open(TEST_FILE) do |reader|
189
+ reader.each {|record| records << record }
190
+ end
191
+ assert_equal records, ['a' * 10_000, 'b' * 10_000]
192
+ end
193
+ end
@@ -0,0 +1,15 @@
1
+ {"type": "record", "name": "Json", "namespace":"org.apache.avro.data",
2
+ "fields": [
3
+ {"name": "value",
4
+ "type": [
5
+ "long",
6
+ "double",
7
+ "string",
8
+ "boolean",
9
+ "null",
10
+ {"type": "array", "items": "Json"},
11
+ {"type": "map", "values": "Json"}
12
+ ]
13
+ }
14
+ ]
15
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "type": "record",
3
+ "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
4
+ "fields": [
5
+ {"name": "clientHash",
6
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
7
+ {"name": "clientProtocol", "type": ["null", "string"]},
8
+ {"name": "serverHash", "type": "MD5"},
9
+ {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
10
+ ]
11
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "type": "record",
3
+ "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
4
+ "fields": [
5
+ {"name": "match",
6
+ "type": {"type": "enum", "name": "HandshakeMatch",
7
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
8
+ {"name": "serverProtocol",
9
+ "type": ["null", "string"]},
10
+ {"name": "serverHash",
11
+ "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
12
+ {"name": "meta",
13
+ "type": ["null", {"type": "map", "values": "bytes"}]}
14
+ ]
15
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ /**
20
+ * A Span is our basic unit of tracing. It tracks the critical points
21
+ * of a single RPC call and records other call meta-data. It also
22
+ * allows arbitrary string annotations. Both the client and server create
23
+ * Span objects, each of which is populated with half of the relevant event
24
+ * data. They share a span ID, which allows us to merge them into one complete
25
+ * span later on.
26
+ */
27
+ @namespace("org.apache.avro.ipc.trace")
28
+
29
+ protocol AvroTrace {
30
+ enum SpanEvent { SERVER_RECV, SERVER_SEND, CLIENT_RECV, CLIENT_SEND }
31
+
32
+ fixed ID(8);
33
+
34
+ record TimestampedEvent {
35
+ long timeStamp; // Unix time, in nanoseconds
36
+ union { SpanEvent, string} event;
37
+ }
38
+
39
+ /**
40
+ * An individual span is the basic unit of testing.
41
+ * The record is used by both \"client\" and \"server\".
42
+ */
43
+ record Span {
44
+ ID traceID; // ID shared by all Spans in a given trace
45
+ ID spanID; // Random ID for this Span
46
+ union { ID, null } parentSpanID; // Parent Span ID (null if root Span)
47
+ string messageName; // Function call represented
48
+ long requestPayloadSize; // Size (bytes) of the request
49
+ long responsePayloadSize; // Size (byts) of the response
50
+ union { string, null} requestorHostname; // Hostname of requestor
51
+ // int requestorPort; // Port of the requestor (currently unused)
52
+ union { string, null } responderHostname; // Hostname of the responder
53
+ // int responderPort; // Port of the responder (currently unused)
54
+ array<TimestampedEvent> events; // List of critical events
55
+ boolean complete; // Whether includes data from both sides
56
+ }
57
+
58
+ /**
59
+ * Get all spans stored on this host.
60
+ */
61
+ array<Span> getAllSpans();
62
+
63
+ /**
64
+ * Get spans occuring between start and end. Each is a unix timestamp
65
+ * in nanosecond units (for consistency with TimestampedEvent).
66
+ */
67
+ array<Span> getSpansInRange(long start, long end);
68
+ }
@@ -0,0 +1,82 @@
1
+ {
2
+ "protocol" : "AvroTrace",
3
+ "namespace" : "org.apache.avro.ipc.trace",
4
+ "types" : [ {
5
+ "type" : "enum",
6
+ "name" : "SpanEvent",
7
+ "symbols" : [ "SERVER_RECV", "SERVER_SEND", "CLIENT_RECV", "CLIENT_SEND" ]
8
+ }, {
9
+ "type" : "fixed",
10
+ "name" : "ID",
11
+ "size" : 8
12
+ }, {
13
+ "type" : "record",
14
+ "name" : "TimestampedEvent",
15
+ "fields" : [ {
16
+ "name" : "timeStamp",
17
+ "type" : "long"
18
+ }, {
19
+ "name" : "event",
20
+ "type" : [ "SpanEvent", "string" ]
21
+ } ]
22
+ }, {
23
+ "type" : "record",
24
+ "name" : "Span",
25
+ "fields" : [ {
26
+ "name" : "traceID",
27
+ "type" : "ID"
28
+ }, {
29
+ "name" : "spanID",
30
+ "type" : "ID"
31
+ }, {
32
+ "name" : "parentSpanID",
33
+ "type" : [ "ID", "null" ]
34
+ }, {
35
+ "name" : "messageName",
36
+ "type" : "string"
37
+ }, {
38
+ "name" : "requestPayloadSize",
39
+ "type" : "long"
40
+ }, {
41
+ "name" : "responsePayloadSize",
42
+ "type" : "long"
43
+ }, {
44
+ "name" : "requestorHostname",
45
+ "type" : [ "string", "null" ]
46
+ }, {
47
+ "name" : "responderHostname",
48
+ "type" : [ "string", "null" ]
49
+ }, {
50
+ "name" : "events",
51
+ "type" : {
52
+ "type" : "array",
53
+ "items" : "TimestampedEvent"
54
+ }
55
+ }, {
56
+ "name" : "complete",
57
+ "type" : "boolean"
58
+ } ]
59
+ } ],
60
+ "messages" : {
61
+ "getAllSpans" : {
62
+ "request" : [ ],
63
+ "response" : {
64
+ "type" : "array",
65
+ "items" : "Span"
66
+ }
67
+ },
68
+ "getSpansInRange" : {
69
+ "request" : [ {
70
+ "name" : "start",
71
+ "type" : "long"
72
+ }, {
73
+ "name" : "end",
74
+ "type" : "long"
75
+ } ],
76
+ "response" : {
77
+ "type" : "array",
78
+ "items" : "Span"
79
+ }
80
+ }
81
+ }
82
+ }
@@ -0,0 +1,64 @@
1
+ {"namespace":"org.apache.avro.mapred.tether",
2
+ "protocol": "InputProtocol",
3
+ "doc": "Transmit inputs to a map or reduce task sub-process.",
4
+
5
+ "types": [
6
+ {"name": "TaskType", "type": "enum", "symbols": ["MAP","REDUCE"]}
7
+ ],
8
+
9
+ "messages": {
10
+
11
+ "configure": {
12
+ "doc": "Configure the task. Sent before any other message.",
13
+ "request": [
14
+ {"name": "taskType", "type": "TaskType",
15
+ "doc": "Whether this is a map or reduce task."},
16
+ {"name": "inSchema", "type": "string",
17
+ "doc": "The Avro schema for task input data."},
18
+ {"name": "outSchema", "type": "string",
19
+ "doc": "The Avro schema for task output data."}
20
+ ],
21
+ "response": "null",
22
+ "one-way": true
23
+ },
24
+
25
+ "partitions": {
26
+ "doc": "Set the number of map output partitions.",
27
+ "request": [
28
+ {"name": "partitions", "type": "int",
29
+ "doc": "The number of map output partitions."}
30
+ ],
31
+ "response": "null",
32
+ "one-way": true
33
+ },
34
+
35
+ "input": {
36
+ "doc": "Send a block of input data to a task.",
37
+ "request": [
38
+ {"name": "data", "type": "bytes",
39
+ "doc": "A sequence of instances of the declared schema."},
40
+ {"name": "count", "type": "long",
41
+ "default": 1,
42
+ "doc": "The number of instances in this block."}
43
+ ],
44
+ "response": "null",
45
+ "one-way": true
46
+ },
47
+
48
+ "abort": {
49
+ "doc": "Called to abort the task.",
50
+ "request": [],
51
+ "response": "null",
52
+ "one-way": true
53
+ },
54
+
55
+ "complete": {
56
+ "doc": "Called when a task's input is complete.",
57
+ "request": [],
58
+ "response": "null",
59
+ "one-way": true
60
+ }
61
+
62
+ }
63
+
64
+ }