tros 1.7.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.travis.yml +13 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +18 -0
- data/README.md +18 -0
- data/Rakefile +25 -0
- data/lib/tros.rb +39 -0
- data/lib/tros/data_file.rb +342 -0
- data/lib/tros/io.rb +610 -0
- data/lib/tros/ipc.rb +550 -0
- data/lib/tros/protocol.rb +161 -0
- data/lib/tros/schema.rb +405 -0
- data/lib/tros/version.rb +3 -0
- data/test/datafile_test.rb +193 -0
- data/test/fixtures/schemas/org/apache/avro/data/Json.avsc +15 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/HandshakeRequest.avsc +11 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/HandshakeResponse.avsc +15 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/trace/avroTrace.avdl +68 -0
- data/test/fixtures/schemas/org/apache/avro/ipc/trace/avroTrace.avpr +82 -0
- data/test/fixtures/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr +64 -0
- data/test/fixtures/schemas/org/apache/avro/mapred/tether/OutputProtocol.avpr +82 -0
- data/test/helpers/random_data.rb +90 -0
- data/test/io_test.rb +419 -0
- data/test/protocol_test.rb +195 -0
- data/test/sample_ipc_client.rb +85 -0
- data/test/sample_ipc_http_client.rb +84 -0
- data/test/sample_ipc_http_server.rb +79 -0
- data/test/sample_ipc_server.rb +92 -0
- data/test/schema_test.rb +135 -0
- data/test/socket_transport_test.rb +40 -0
- data/test/test_helper.rb +26 -0
- data/test/tool.rb +144 -0
- data/tros.gemspec +32 -0
- metadata +137 -0
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
source 'https://rubygems.org'
|
17
|
+
gemspec
|
data/Gemfile.lock
ADDED
data/README.md
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Tros [![Build Status](https://travis-ci.org/wvanbergen/tros.svg?branch=master)](https://travis-ci.org/wvanbergen/tros)
|
2
|
+
|
3
|
+
This is a cleanup fork of the Avro gem.
|
4
|
+
|
5
|
+
Reasons:
|
6
|
+
- Get rid of yajl/multi_json dependency
|
7
|
+
- Drop support for Ruby 1.8
|
8
|
+
- Add proper unicode support.
|
9
|
+
- Not being stuck to Apache Avro project release schedule
|
10
|
+
- Public CI.
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
For now, the API is the same as the Avro API. Just replace `Avro` with `Tros`.
|
15
|
+
|
16
|
+
## Tros?
|
17
|
+
|
18
|
+
The name Tros probably only makes sense to Dutch people.
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'bundler/gem_tasks'
|
18
|
+
require 'rake/testtask'
|
19
|
+
|
20
|
+
Rake::TestTask.new('test') do |t|
|
21
|
+
t.libs << 'lib' << 'test'
|
22
|
+
t.test_files = FileList['test/*_test.rb']
|
23
|
+
end
|
24
|
+
|
25
|
+
task :default => :test
|
data/lib/tros.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'json'
|
18
|
+
require 'set'
|
19
|
+
require 'digest/md5'
|
20
|
+
require 'net/http'
|
21
|
+
require 'stringio'
|
22
|
+
require 'zlib'
|
23
|
+
|
24
|
+
module Tros
|
25
|
+
class TrosError < StandardError; end
|
26
|
+
|
27
|
+
class TrosTypeError < Tros::TrosError
|
28
|
+
def initialize(schm=nil, datum=nil, msg=nil)
|
29
|
+
msg ||= "Not a #{schm.to_s}: #{datum}"
|
30
|
+
super(msg)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'tros/schema'
|
36
|
+
require 'tros/io'
|
37
|
+
require 'tros/data_file'
|
38
|
+
require 'tros/protocol'
|
39
|
+
require 'tros/ipc'
|
@@ -0,0 +1,342 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'openssl'
|
18
|
+
|
19
|
+
module Tros
|
20
|
+
module DataFile
|
21
|
+
VERSION = 1
|
22
|
+
MAGIC = "Obj" + [VERSION].pack('c')
|
23
|
+
MAGIC_SIZE = MAGIC.bytesize
|
24
|
+
SYNC_SIZE = 16
|
25
|
+
SYNC_INTERVAL = 4000 * SYNC_SIZE
|
26
|
+
META_SCHEMA = Schema.parse('{"type": "map", "values": "bytes"}')
|
27
|
+
VALID_ENCODINGS = ['binary'] # not used yet
|
28
|
+
|
29
|
+
class DataFileError < TrosError; end
|
30
|
+
|
31
|
+
def self.open(file_path, mode='r', schema=nil, codec=nil)
|
32
|
+
schema = Tros::Schema.parse(schema) if schema
|
33
|
+
case mode
|
34
|
+
when 'w'
|
35
|
+
unless schema
|
36
|
+
raise DataFileError, "Writing an Tros file requires a schema."
|
37
|
+
end
|
38
|
+
io = open_writer(File.open(file_path, 'wb'), schema, codec)
|
39
|
+
when 'r'
|
40
|
+
io = open_reader(File.open(file_path, 'rb'), schema)
|
41
|
+
else
|
42
|
+
raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
|
43
|
+
end
|
44
|
+
|
45
|
+
yield io if block_given?
|
46
|
+
io
|
47
|
+
ensure
|
48
|
+
io.close if block_given? && io
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.codecs
|
52
|
+
@codecs
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.register_codec(codec)
|
56
|
+
@codecs ||= {}
|
57
|
+
codec = codec.new if !codec.respond_to?(:codec_name) && codec.is_a?(Class)
|
58
|
+
@codecs[codec.codec_name.to_s] = codec
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.get_codec(codec)
|
62
|
+
codec ||= 'null'
|
63
|
+
if codec.respond_to?(:compress) && codec.respond_to?(:decompress)
|
64
|
+
codec # it's a codec instance
|
65
|
+
elsif codec.is_a?(Class)
|
66
|
+
codec.new # it's a codec class
|
67
|
+
elsif @codecs.include?(codec.to_s)
|
68
|
+
@codecs[codec.to_s] # it's a string or symbol (codec name)
|
69
|
+
else
|
70
|
+
raise DataFileError, "Unknown codec: #{codec.inspect}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class << self
|
75
|
+
private
|
76
|
+
def open_writer(file, schema, codec=nil)
|
77
|
+
writer = Tros::IO::DatumWriter.new(schema)
|
78
|
+
Tros::DataFile::Writer.new(file, writer, schema, codec)
|
79
|
+
end
|
80
|
+
|
81
|
+
def open_reader(file, schema)
|
82
|
+
reader = Tros::IO::DatumReader.new(nil, schema)
|
83
|
+
Tros::DataFile::Reader.new(file, reader)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Writer
|
88
|
+
def self.generate_sync_marker
|
89
|
+
OpenSSL::Random.random_bytes(16)
|
90
|
+
end
|
91
|
+
|
92
|
+
attr_reader :writer, :encoder, :datum_writer, :buffer_writer, :buffer_encoder, :sync_marker, :meta, :codec
|
93
|
+
attr_accessor :block_count
|
94
|
+
|
95
|
+
def initialize(writer, datum_writer, writers_schema=nil, codec=nil)
|
96
|
+
# If writers_schema is not present, presume we're appending
|
97
|
+
@writer = writer
|
98
|
+
@encoder = IO::BinaryEncoder.new(@writer)
|
99
|
+
@datum_writer = datum_writer
|
100
|
+
@buffer_writer = StringIO.new('', 'w')
|
101
|
+
@buffer_encoder = IO::BinaryEncoder.new(@buffer_writer)
|
102
|
+
@block_count = 0
|
103
|
+
|
104
|
+
@meta = {}
|
105
|
+
|
106
|
+
if writers_schema
|
107
|
+
@sync_marker = Writer.generate_sync_marker
|
108
|
+
@codec = DataFile.get_codec(codec)
|
109
|
+
meta['tros.codec'] = @codec.codec_name.to_s
|
110
|
+
meta['tros.schema'] = writers_schema.to_s
|
111
|
+
datum_writer.writers_schema = writers_schema
|
112
|
+
write_header
|
113
|
+
else
|
114
|
+
# open writer for reading to collect metadata
|
115
|
+
dfr = Reader.new(writer, Tros::IO::DatumReader.new)
|
116
|
+
|
117
|
+
# FIXME(jmhodges): collect arbitrary metadata
|
118
|
+
# collect metadata
|
119
|
+
@sync_marker = dfr.sync_marker
|
120
|
+
meta['tros.codec'] = dfr.meta['tros.codec']
|
121
|
+
@codec = DataFile.get_codec(meta['tros.codec'])
|
122
|
+
|
123
|
+
# get schema used to write existing file
|
124
|
+
schema_from_file = dfr.meta['tros.schema']
|
125
|
+
meta['tros.schema'] = schema_from_file
|
126
|
+
datum_writer.writers_schema = Schema.parse(schema_from_file)
|
127
|
+
|
128
|
+
# seek to the end of the file and prepare for writing
|
129
|
+
writer.seek(0,2)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Append a datum to the file
|
134
|
+
def <<(datum)
|
135
|
+
datum_writer.write(datum, buffer_encoder)
|
136
|
+
self.block_count += 1
|
137
|
+
|
138
|
+
# if the data to write is larger than the sync interval, write
|
139
|
+
# the block
|
140
|
+
if buffer_writer.tell >= SYNC_INTERVAL
|
141
|
+
write_block
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Return the current position as a value that may be passed to
|
146
|
+
# DataFileReader.seek(long). Forces the end of the current block,
|
147
|
+
# emitting a synchronization marker.
|
148
|
+
def sync
|
149
|
+
write_block
|
150
|
+
writer.tell
|
151
|
+
end
|
152
|
+
|
153
|
+
# Flush the current state of the file, including metadata
|
154
|
+
def flush
|
155
|
+
write_block
|
156
|
+
writer.flush
|
157
|
+
end
|
158
|
+
|
159
|
+
def close
|
160
|
+
flush
|
161
|
+
writer.close
|
162
|
+
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
def write_header
|
167
|
+
# write magic
|
168
|
+
writer.write(MAGIC)
|
169
|
+
|
170
|
+
# write metadata
|
171
|
+
datum_writer.write_data(META_SCHEMA, meta, encoder)
|
172
|
+
|
173
|
+
# write sync marker
|
174
|
+
writer.write(sync_marker)
|
175
|
+
end
|
176
|
+
|
177
|
+
# TODO(jmhodges): make a schema for blocks and use datum_writer
|
178
|
+
# TODO(jmhodges): do we really need the number of items in the block?
|
179
|
+
def write_block
|
180
|
+
if block_count > 0
|
181
|
+
# write number of items in block and block size in bytes
|
182
|
+
encoder.write_long(block_count)
|
183
|
+
to_write = codec.compress(buffer_writer.string)
|
184
|
+
encoder.write_long(to_write.bytesize)
|
185
|
+
|
186
|
+
# write block contents
|
187
|
+
writer.write(to_write)
|
188
|
+
|
189
|
+
# write sync marker
|
190
|
+
writer.write(sync_marker)
|
191
|
+
|
192
|
+
# reset buffer
|
193
|
+
buffer_writer.truncate(0)
|
194
|
+
buffer_writer.rewind
|
195
|
+
self.block_count = 0
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# Read files written by DataFileWriter
|
201
|
+
class Reader
|
202
|
+
include ::Enumerable
|
203
|
+
|
204
|
+
# The reader and binary decoder for the raw file stream
|
205
|
+
attr_reader :reader, :decoder
|
206
|
+
|
207
|
+
# The binary decoder for the contents of a block (after codec decompression)
|
208
|
+
attr_reader :block_decoder
|
209
|
+
|
210
|
+
attr_reader :datum_reader, :sync_marker, :meta, :file_length, :codec
|
211
|
+
attr_accessor :block_count # records remaining in current block
|
212
|
+
|
213
|
+
def initialize(reader, datum_reader)
|
214
|
+
@reader = reader
|
215
|
+
@decoder = IO::BinaryDecoder.new(reader)
|
216
|
+
@datum_reader = datum_reader
|
217
|
+
|
218
|
+
# read the header: magic, meta, sync
|
219
|
+
read_header
|
220
|
+
|
221
|
+
@codec = DataFile.get_codec(meta['tros.codec'])
|
222
|
+
|
223
|
+
# get ready to read
|
224
|
+
@block_count = 0
|
225
|
+
datum_reader.writers_schema = Schema.parse meta['tros.schema']
|
226
|
+
end
|
227
|
+
|
228
|
+
# Iterates through each datum in this file
|
229
|
+
# TODO(jmhodges): handle block of length zero
|
230
|
+
def each
|
231
|
+
loop do
|
232
|
+
if block_count == 0
|
233
|
+
case
|
234
|
+
when eof?; break
|
235
|
+
when skip_sync
|
236
|
+
break if eof?
|
237
|
+
read_block_header
|
238
|
+
else
|
239
|
+
read_block_header
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
datum = datum_reader.read(block_decoder)
|
244
|
+
self.block_count -= 1
|
245
|
+
yield(datum)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def eof?; reader.eof?; end
|
250
|
+
|
251
|
+
def close
|
252
|
+
reader.close
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
def read_header
|
257
|
+
# seek to the beginning of the file to get magic block
|
258
|
+
reader.seek(0, 0)
|
259
|
+
|
260
|
+
# check magic number
|
261
|
+
magic_in_file = reader.read(MAGIC_SIZE)
|
262
|
+
if magic_in_file.size < MAGIC_SIZE
|
263
|
+
msg = 'Not an Tros data file: shorter than the Tros magic block'
|
264
|
+
raise DataFileError, msg
|
265
|
+
elsif magic_in_file != MAGIC
|
266
|
+
msg = "Not an Tros data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
|
267
|
+
raise DataFileError, msg
|
268
|
+
end
|
269
|
+
|
270
|
+
# read metadata
|
271
|
+
@meta = datum_reader.read_data(META_SCHEMA,
|
272
|
+
META_SCHEMA,
|
273
|
+
decoder)
|
274
|
+
# read sync marker
|
275
|
+
@sync_marker = reader.read(SYNC_SIZE)
|
276
|
+
end
|
277
|
+
|
278
|
+
def read_block_header
|
279
|
+
self.block_count = decoder.read_long
|
280
|
+
block_bytes = decoder.read_long
|
281
|
+
data = codec.decompress(reader.read(block_bytes))
|
282
|
+
@block_decoder = IO::BinaryDecoder.new(StringIO.new(data))
|
283
|
+
end
|
284
|
+
|
285
|
+
# read the length of the sync marker; if it matches the sync
|
286
|
+
# marker, return true. Otherwise, seek back to where we started
|
287
|
+
# and return false
|
288
|
+
def skip_sync
|
289
|
+
proposed_sync_marker = reader.read(SYNC_SIZE)
|
290
|
+
if proposed_sync_marker != sync_marker
|
291
|
+
reader.seek(-SYNC_SIZE, 1)
|
292
|
+
false
|
293
|
+
else
|
294
|
+
true
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
|
300
|
+
class NullCodec
|
301
|
+
def codec_name; 'null'; end
|
302
|
+
def decompress(data); data; end
|
303
|
+
def compress(data); data; end
|
304
|
+
end
|
305
|
+
|
306
|
+
class DeflateCodec
|
307
|
+
attr_reader :level
|
308
|
+
|
309
|
+
def initialize(level=Zlib::DEFAULT_COMPRESSION)
|
310
|
+
@level = level
|
311
|
+
end
|
312
|
+
|
313
|
+
def codec_name; 'deflate'; end
|
314
|
+
|
315
|
+
def decompress(compressed)
|
316
|
+
# Passing a negative number to Inflate puts it into "raw" RFC1951 mode
|
317
|
+
# (without the RFC1950 header & checksum). See the docs for
|
318
|
+
# inflateInit2 in http://www.zlib.net/manual.html
|
319
|
+
zstream = Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
320
|
+
data = zstream.inflate(compressed)
|
321
|
+
data << zstream.finish
|
322
|
+
ensure
|
323
|
+
zstream.close
|
324
|
+
end
|
325
|
+
|
326
|
+
def compress(data)
|
327
|
+
zstream = Zlib::Deflate.new(level, -Zlib::MAX_WBITS)
|
328
|
+
compressed = zstream.deflate(data)
|
329
|
+
compressed << zstream.finish
|
330
|
+
ensure
|
331
|
+
zstream.close
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
DataFile.register_codec NullCodec
|
336
|
+
DataFile.register_codec DeflateCodec
|
337
|
+
|
338
|
+
# TODO this constant won't be updated if you register another codec.
|
339
|
+
# Deprecated in favor of Tros::DataFile::codecs
|
340
|
+
VALID_CODECS = DataFile.codecs.keys
|
341
|
+
end
|
342
|
+
end
|