tros 1.7.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ .DS_Store
2
+ /tmp
3
+ /interop
4
+ data.avr
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ script: bundle exec rake
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - 2.1.1
7
+ - ruby-head
8
+ - jruby-19mode
9
+ - rbx-2.2.6
10
+ matrix:
11
+ allow_failures:
12
+ - rvm: rbx-2.2.6
13
+ - rvm: ruby-head
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ source 'https://rubygems.org'
17
+ gemspec
@@ -0,0 +1,18 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ tros (1.7.6.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ minitest (5.0.6)
10
+ rake (10.3.2)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ minitest (~> 5)
17
+ rake
18
+ tros!
@@ -0,0 +1,18 @@
1
+ # Tros [![Build Status](https://travis-ci.org/wvanbergen/tros.svg?branch=master)](https://travis-ci.org/wvanbergen/tros)
2
+
3
+ This is a cleanup fork of the Avro gem.
4
+
5
+ Reasons:
6
+ - Get rid of yajl/multi_json dependency
7
+ - Drop support for Ruby 1.8
8
+ - Add proper unicode support.
9
+ - Not being stuck to Apache Avro project release schedule
10
+ - Public CI.
11
+
12
+ ## Usage
13
+
14
+ For now, the API is the same as the Avro API. Just replace `Avro` with `Tros`.
15
+
16
+ ## Tros?
17
+
18
+ The name Tros probably only makes sense to Dutch people.
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'bundler/gem_tasks'
18
+ require 'rake/testtask'
19
+
20
+ Rake::TestTask.new('test') do |t|
21
+ t.libs << 'lib' << 'test'
22
+ t.test_files = FileList['test/*_test.rb']
23
+ end
24
+
25
+ task :default => :test
@@ -0,0 +1,39 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'json'
18
+ require 'set'
19
+ require 'digest/md5'
20
+ require 'net/http'
21
+ require 'stringio'
22
+ require 'zlib'
23
+
24
+ module Tros
25
+ class TrosError < StandardError; end
26
+
27
+ class TrosTypeError < Tros::TrosError
28
+ def initialize(schm=nil, datum=nil, msg=nil)
29
+ msg ||= "Not a #{schm.to_s}: #{datum}"
30
+ super(msg)
31
+ end
32
+ end
33
+ end
34
+
35
+ require 'tros/schema'
36
+ require 'tros/io'
37
+ require 'tros/data_file'
38
+ require 'tros/protocol'
39
+ require 'tros/ipc'
@@ -0,0 +1,342 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'openssl'
18
+
19
+ module Tros
20
+ module DataFile
21
+ VERSION = 1
22
+ MAGIC = "Obj" + [VERSION].pack('c')
23
+ MAGIC_SIZE = MAGIC.bytesize
24
+ SYNC_SIZE = 16
25
+ SYNC_INTERVAL = 4000 * SYNC_SIZE
26
+ META_SCHEMA = Schema.parse('{"type": "map", "values": "bytes"}')
27
+ VALID_ENCODINGS = ['binary'] # not used yet
28
+
29
+ class DataFileError < TrosError; end
30
+
31
+ def self.open(file_path, mode='r', schema=nil, codec=nil)
32
+ schema = Tros::Schema.parse(schema) if schema
33
+ case mode
34
+ when 'w'
35
+ unless schema
36
+ raise DataFileError, "Writing an Tros file requires a schema."
37
+ end
38
+ io = open_writer(File.open(file_path, 'wb'), schema, codec)
39
+ when 'r'
40
+ io = open_reader(File.open(file_path, 'rb'), schema)
41
+ else
42
+ raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
43
+ end
44
+
45
+ yield io if block_given?
46
+ io
47
+ ensure
48
+ io.close if block_given? && io
49
+ end
50
+
51
+ def self.codecs
52
+ @codecs
53
+ end
54
+
55
+ def self.register_codec(codec)
56
+ @codecs ||= {}
57
+ codec = codec.new if !codec.respond_to?(:codec_name) && codec.is_a?(Class)
58
+ @codecs[codec.codec_name.to_s] = codec
59
+ end
60
+
61
+ def self.get_codec(codec)
62
+ codec ||= 'null'
63
+ if codec.respond_to?(:compress) && codec.respond_to?(:decompress)
64
+ codec # it's a codec instance
65
+ elsif codec.is_a?(Class)
66
+ codec.new # it's a codec class
67
+ elsif @codecs.include?(codec.to_s)
68
+ @codecs[codec.to_s] # it's a string or symbol (codec name)
69
+ else
70
+ raise DataFileError, "Unknown codec: #{codec.inspect}"
71
+ end
72
+ end
73
+
74
+ class << self
75
+ private
76
+ def open_writer(file, schema, codec=nil)
77
+ writer = Tros::IO::DatumWriter.new(schema)
78
+ Tros::DataFile::Writer.new(file, writer, schema, codec)
79
+ end
80
+
81
+ def open_reader(file, schema)
82
+ reader = Tros::IO::DatumReader.new(nil, schema)
83
+ Tros::DataFile::Reader.new(file, reader)
84
+ end
85
+ end
86
+
87
+ class Writer
88
+ def self.generate_sync_marker
89
+ OpenSSL::Random.random_bytes(16)
90
+ end
91
+
92
+ attr_reader :writer, :encoder, :datum_writer, :buffer_writer, :buffer_encoder, :sync_marker, :meta, :codec
93
+ attr_accessor :block_count
94
+
95
+ def initialize(writer, datum_writer, writers_schema=nil, codec=nil)
96
+ # If writers_schema is not present, presume we're appending
97
+ @writer = writer
98
+ @encoder = IO::BinaryEncoder.new(@writer)
99
+ @datum_writer = datum_writer
100
+ @buffer_writer = StringIO.new('', 'w')
101
+ @buffer_encoder = IO::BinaryEncoder.new(@buffer_writer)
102
+ @block_count = 0
103
+
104
+ @meta = {}
105
+
106
+ if writers_schema
107
+ @sync_marker = Writer.generate_sync_marker
108
+ @codec = DataFile.get_codec(codec)
109
+ meta['tros.codec'] = @codec.codec_name.to_s
110
+ meta['tros.schema'] = writers_schema.to_s
111
+ datum_writer.writers_schema = writers_schema
112
+ write_header
113
+ else
114
+ # open writer for reading to collect metadata
115
+ dfr = Reader.new(writer, Tros::IO::DatumReader.new)
116
+
117
+ # FIXME(jmhodges): collect arbitrary metadata
118
+ # collect metadata
119
+ @sync_marker = dfr.sync_marker
120
+ meta['tros.codec'] = dfr.meta['tros.codec']
121
+ @codec = DataFile.get_codec(meta['tros.codec'])
122
+
123
+ # get schema used to write existing file
124
+ schema_from_file = dfr.meta['tros.schema']
125
+ meta['tros.schema'] = schema_from_file
126
+ datum_writer.writers_schema = Schema.parse(schema_from_file)
127
+
128
+ # seek to the end of the file and prepare for writing
129
+ writer.seek(0,2)
130
+ end
131
+ end
132
+
133
+ # Append a datum to the file
134
+ def <<(datum)
135
+ datum_writer.write(datum, buffer_encoder)
136
+ self.block_count += 1
137
+
138
+ # if the data to write is larger than the sync interval, write
139
+ # the block
140
+ if buffer_writer.tell >= SYNC_INTERVAL
141
+ write_block
142
+ end
143
+ end
144
+
145
+ # Return the current position as a value that may be passed to
146
+ # DataFileReader.seek(long). Forces the end of the current block,
147
+ # emitting a synchronization marker.
148
+ def sync
149
+ write_block
150
+ writer.tell
151
+ end
152
+
153
+ # Flush the current state of the file, including metadata
154
+ def flush
155
+ write_block
156
+ writer.flush
157
+ end
158
+
159
+ def close
160
+ flush
161
+ writer.close
162
+ end
163
+
164
+ private
165
+
166
+ def write_header
167
+ # write magic
168
+ writer.write(MAGIC)
169
+
170
+ # write metadata
171
+ datum_writer.write_data(META_SCHEMA, meta, encoder)
172
+
173
+ # write sync marker
174
+ writer.write(sync_marker)
175
+ end
176
+
177
+ # TODO(jmhodges): make a schema for blocks and use datum_writer
178
+ # TODO(jmhodges): do we really need the number of items in the block?
179
+ def write_block
180
+ if block_count > 0
181
+ # write number of items in block and block size in bytes
182
+ encoder.write_long(block_count)
183
+ to_write = codec.compress(buffer_writer.string)
184
+ encoder.write_long(to_write.bytesize)
185
+
186
+ # write block contents
187
+ writer.write(to_write)
188
+
189
+ # write sync marker
190
+ writer.write(sync_marker)
191
+
192
+ # reset buffer
193
+ buffer_writer.truncate(0)
194
+ buffer_writer.rewind
195
+ self.block_count = 0
196
+ end
197
+ end
198
+ end
199
+
200
+ # Read files written by DataFileWriter
201
+ class Reader
202
+ include ::Enumerable
203
+
204
+ # The reader and binary decoder for the raw file stream
205
+ attr_reader :reader, :decoder
206
+
207
+ # The binary decoder for the contents of a block (after codec decompression)
208
+ attr_reader :block_decoder
209
+
210
+ attr_reader :datum_reader, :sync_marker, :meta, :file_length, :codec
211
+ attr_accessor :block_count # records remaining in current block
212
+
213
+ def initialize(reader, datum_reader)
214
+ @reader = reader
215
+ @decoder = IO::BinaryDecoder.new(reader)
216
+ @datum_reader = datum_reader
217
+
218
+ # read the header: magic, meta, sync
219
+ read_header
220
+
221
+ @codec = DataFile.get_codec(meta['tros.codec'])
222
+
223
+ # get ready to read
224
+ @block_count = 0
225
+ datum_reader.writers_schema = Schema.parse meta['tros.schema']
226
+ end
227
+
228
+ # Iterates through each datum in this file
229
+ # TODO(jmhodges): handle block of length zero
230
+ def each
231
+ loop do
232
+ if block_count == 0
233
+ case
234
+ when eof?; break
235
+ when skip_sync
236
+ break if eof?
237
+ read_block_header
238
+ else
239
+ read_block_header
240
+ end
241
+ end
242
+
243
+ datum = datum_reader.read(block_decoder)
244
+ self.block_count -= 1
245
+ yield(datum)
246
+ end
247
+ end
248
+
249
+ def eof?; reader.eof?; end
250
+
251
+ def close
252
+ reader.close
253
+ end
254
+
255
+ private
256
+ def read_header
257
+ # seek to the beginning of the file to get magic block
258
+ reader.seek(0, 0)
259
+
260
+ # check magic number
261
+ magic_in_file = reader.read(MAGIC_SIZE)
262
+ if magic_in_file.size < MAGIC_SIZE
263
+ msg = 'Not an Tros data file: shorter than the Tros magic block'
264
+ raise DataFileError, msg
265
+ elsif magic_in_file != MAGIC
266
+ msg = "Not an Tros data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
267
+ raise DataFileError, msg
268
+ end
269
+
270
+ # read metadata
271
+ @meta = datum_reader.read_data(META_SCHEMA,
272
+ META_SCHEMA,
273
+ decoder)
274
+ # read sync marker
275
+ @sync_marker = reader.read(SYNC_SIZE)
276
+ end
277
+
278
+ def read_block_header
279
+ self.block_count = decoder.read_long
280
+ block_bytes = decoder.read_long
281
+ data = codec.decompress(reader.read(block_bytes))
282
+ @block_decoder = IO::BinaryDecoder.new(StringIO.new(data))
283
+ end
284
+
285
+ # read the length of the sync marker; if it matches the sync
286
+ # marker, return true. Otherwise, seek back to where we started
287
+ # and return false
288
+ def skip_sync
289
+ proposed_sync_marker = reader.read(SYNC_SIZE)
290
+ if proposed_sync_marker != sync_marker
291
+ reader.seek(-SYNC_SIZE, 1)
292
+ false
293
+ else
294
+ true
295
+ end
296
+ end
297
+ end
298
+
299
+
300
+ class NullCodec
301
+ def codec_name; 'null'; end
302
+ def decompress(data); data; end
303
+ def compress(data); data; end
304
+ end
305
+
306
+ class DeflateCodec
307
+ attr_reader :level
308
+
309
+ def initialize(level=Zlib::DEFAULT_COMPRESSION)
310
+ @level = level
311
+ end
312
+
313
+ def codec_name; 'deflate'; end
314
+
315
+ def decompress(compressed)
316
+ # Passing a negative number to Inflate puts it into "raw" RFC1951 mode
317
+ # (without the RFC1950 header & checksum). See the docs for
318
+ # inflateInit2 in http://www.zlib.net/manual.html
319
+ zstream = Zlib::Inflate.new(-Zlib::MAX_WBITS)
320
+ data = zstream.inflate(compressed)
321
+ data << zstream.finish
322
+ ensure
323
+ zstream.close
324
+ end
325
+
326
+ def compress(data)
327
+ zstream = Zlib::Deflate.new(level, -Zlib::MAX_WBITS)
328
+ compressed = zstream.deflate(data)
329
+ compressed << zstream.finish
330
+ ensure
331
+ zstream.close
332
+ end
333
+ end
334
+
335
+ DataFile.register_codec NullCodec
336
+ DataFile.register_codec DeflateCodec
337
+
338
+ # TODO this constant won't be updated if you register another codec.
339
+ # Deprecated in favor of Tros::DataFile::codecs
340
+ VALID_CODECS = DataFile.codecs.keys
341
+ end
342
+ end