avro-jruby 1.7.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+ v0.0.1 stuff
@@ -0,0 +1,23 @@
1
+ CHANGELOG
2
+ Manifest
3
+ Rakefile
4
+ avro.gemspec
5
+ interop/test_interop.rb
6
+ lib/avro.rb
7
+ lib/avro/collect_hash.rb
8
+ lib/avro/data_file.rb
9
+ lib/avro/io.rb
10
+ lib/avro/ipc.rb
11
+ lib/avro/protocol.rb
12
+ lib/avro/schema.rb
13
+ test/random_data.rb
14
+ test/sample_ipc_client.rb
15
+ test/sample_ipc_http_client.rb
16
+ test/sample_ipc_http_server.rb
17
+ test/sample_ipc_server.rb
18
+ test/test_datafile.rb
19
+ test/test_help.rb
20
+ test/test_io.rb
21
+ test/test_protocol.rb
22
+ test/test_socket_transport.rb
23
+ test/tool.rb
@@ -0,0 +1,63 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'rubygems'
18
+ require 'echoe'
19
+ VERSION = File.open('../../share/VERSION.txt').read.sub('-SNAPSHOT', '.pre1').chomp
20
+ Echoe.new('avro', VERSION) do |p|
21
+ p.author = "Apache Software Foundation"
22
+ p.email = "avro-dev@hadoop.apache.org"
23
+ p.summary = "Apache Avro for Ruby"
24
+ p.description = "Avro is a data serialization and RPC format"
25
+ p.url = "http://hadoop.apache.org/avro/"
26
+ p.runtime_dependencies = %w[multi-json]
27
+ end
28
+
29
+ t = Rake::TestTask.new(:interop)
30
+ t.pattern = 'interop/test*.rb'
31
+
32
+ task :generate_interop do
33
+ $:.unshift(HERE + '/lib')
34
+ $:.unshift(HERE + '/test')
35
+ require 'avro'
36
+ require 'random_data'
37
+
38
+ schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
39
+ r = RandomData.new(schema, ENV['SEED'])
40
+ f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
41
+ writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
42
+ begin
43
+ writer << r.next
44
+ writer << r.next
45
+ ensure
46
+ writer.close
47
+ end
48
+
49
+ Avro::DataFile.open(BUILD + '/interop/data/ruby_deflate.avro', 'w', schema.to_s, :deflate) do |writer|
50
+ 20.times { writer << r.next }
51
+ end
52
+ end
53
+
54
+
55
+ HERE = File.expand_path(File.dirname(__FILE__))
56
+ SHARE = HERE + '/../../share'
57
+ SCHEMAS = SHARE + '/test/schemas'
58
+ BUILD = HERE + '/../../build'
59
+
60
+ task :dist => [:gem] do
61
+ mkdir_p "../../dist/ruby"
62
+ cp "pkg/avro-#{VERSION}.gem", "../../dist/ruby"
63
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # stub: avro 1.7.5 ruby lib
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "avro-jruby"
6
+ s.version = "1.7.5"
7
+
8
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
9
+ s.authors = ["Apache Software Foundation"]
10
+ s.date = "2013-10-25"
11
+ s.description = "Avro is a data serialization and RPC format"
12
+ s.email = "avro-dev@hadoop.apache.org"
13
+ s.extra_rdoc_files = ["CHANGELOG", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb"]
14
+ s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro-jruby.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_http_client.rb", "test/sample_ipc_http_server.rb", "test/sample_ipc_server.rb", "test/test_datafile.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_socket_transport.rb", "test/tool.rb", "test/test_schema.rb"]
15
+ s.homepage = "https://github.com/aia/avro-gem-jruby"
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Avro"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = "avro-jruby"
19
+ s.rubygems_version = "2.1.9"
20
+ s.summary = "Apache Avro for Ruby"
21
+ s.test_files = ["test/test_datafile.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_schema.rb", "test/test_socket_transport.rb"]
22
+
23
+ if s.respond_to? :specification_version then
24
+ s.specification_version = 4
25
+
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
+ s.add_runtime_dependency(%q<multi_json>, [">= 0"])
28
+ else
29
+ s.add_dependency(%q<multi_json>, [">= 0"])
30
+ end
31
+ else
32
+ s.add_dependency(%q<multi_json>, [">= 0"])
33
+ end
34
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'rubygems'
19
+ require 'test/unit'
20
+ require 'avro'
21
+
22
+ class TestInterop < Test::Unit::TestCase
23
+ HERE = File.expand_path(File.dirname(__FILE__))
24
+ SHARE = HERE + '/../../../share'
25
+ SCHEMAS = SHARE + '/test/schemas'
26
+ Dir[HERE + '/../../../build/interop/data/*'].each do |fn|
27
+ define_method("test_read_#{File.basename(fn, 'avro')}") do
28
+ projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
29
+
30
+ File.open(fn) do |f|
31
+ r = Avro::DataFile::Reader.new(f, Avro::IO::DatumReader.new(projection))
32
+ i = 0
33
+ r.each do |datum|
34
+ i += 1
35
+ assert_not_nil datum, "nil datum from #{fn}"
36
+ end
37
+ assert_not_equal 0, i, "no data read in from #{fn}"
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,42 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'multi_json'
18
+ require 'set'
19
+ require 'digest/md5'
20
+ require 'net/http'
21
+ require 'stringio'
22
+ require 'zlib'
23
+
24
+ module Avro
25
+ VERSION = "FIXME"
26
+
27
+ class AvroError < StandardError; end
28
+
29
+ class AvroTypeError < Avro::AvroError
30
+ def initialize(schm=nil, datum=nil, msg=nil)
31
+ msg ||= "Not a #{schm.to_s}: #{datum}"
32
+ super(msg)
33
+ end
34
+ end
35
+ end
36
+
37
+ require 'avro/collect_hash'
38
+ require 'avro/schema'
39
+ require 'avro/io'
40
+ require 'avro/data_file'
41
+ require 'avro/protocol'
42
+ require 'avro/ipc'
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Enumerable
18
+ def collect_hash
19
+ inject(Hash.new) do |memo, i|
20
+ k, v = yield(i)
21
+ memo[k] = v if k
22
+ memo
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,342 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'openssl'
18
+
19
+ module Avro
20
+ module DataFile
21
+ VERSION = 1
22
+ MAGIC = "Obj" + [VERSION].pack('c')
23
+ MAGIC_SIZE = MAGIC.size
24
+ SYNC_SIZE = 16
25
+ SYNC_INTERVAL = 1000 * SYNC_SIZE
26
+ META_SCHEMA = Schema.parse('{"type": "map", "values": "bytes"}')
27
+ VALID_ENCODINGS = ['binary'] # not used yet
28
+
29
+ class DataFileError < AvroError; end
30
+
31
+ def self.open(file_path, mode='r', schema=nil, codec=nil)
32
+ schema = Avro::Schema.parse(schema) if schema
33
+ case mode
34
+ when 'w'
35
+ unless schema
36
+ raise DataFileError, "Writing an Avro file requires a schema."
37
+ end
38
+ io = open_writer(File.open(file_path, 'wb'), schema, codec)
39
+ when 'r'
40
+ io = open_reader(File.open(file_path, 'rb'), schema)
41
+ else
42
+ raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
43
+ end
44
+
45
+ yield io if block_given?
46
+ io
47
+ ensure
48
+ io.close if block_given? && io
49
+ end
50
+
51
+ def self.codecs
52
+ @codecs
53
+ end
54
+
55
+ def self.register_codec(codec)
56
+ @codecs ||= {}
57
+ codec = codec.new if !codec.respond_to?(:codec_name) && codec.is_a?(Class)
58
+ @codecs[codec.codec_name.to_s] = codec
59
+ end
60
+
61
+ def self.get_codec(codec)
62
+ codec ||= 'null'
63
+ if codec.respond_to?(:compress) && codec.respond_to?(:decompress)
64
+ codec # it's a codec instance
65
+ elsif codec.is_a?(Class)
66
+ codec.new # it's a codec class
67
+ elsif @codecs.include?(codec.to_s)
68
+ @codecs[codec.to_s] # it's a string or symbol (codec name)
69
+ else
70
+ raise DataFileError, "Unknown codec: #{codec.inspect}"
71
+ end
72
+ end
73
+
74
+ class << self
75
+ private
76
+ def open_writer(file, schema, codec=nil)
77
+ writer = Avro::IO::DatumWriter.new(schema)
78
+ Avro::DataFile::Writer.new(file, writer, schema, codec)
79
+ end
80
+
81
+ def open_reader(file, schema)
82
+ reader = Avro::IO::DatumReader.new(nil, schema)
83
+ Avro::DataFile::Reader.new(file, reader)
84
+ end
85
+ end
86
+
87
+ class Writer
88
+ def self.generate_sync_marker
89
+ OpenSSL::Random.random_bytes(16)
90
+ end
91
+
92
+ attr_reader :writer, :encoder, :datum_writer, :buffer_writer, :buffer_encoder, :sync_marker, :meta, :codec
93
+ attr_accessor :block_count
94
+
95
+ def initialize(writer, datum_writer, writers_schema=nil, codec=nil)
96
+ # If writers_schema is not present, presume we're appending
97
+ @writer = writer
98
+ @encoder = IO::BinaryEncoder.new(@writer)
99
+ @datum_writer = datum_writer
100
+ @buffer_writer = StringIO.new('', 'w')
101
+ @buffer_encoder = IO::BinaryEncoder.new(@buffer_writer)
102
+ @block_count = 0
103
+
104
+ @meta = {}
105
+
106
+ if writers_schema
107
+ @sync_marker = Writer.generate_sync_marker
108
+ @codec = DataFile.get_codec(codec)
109
+ meta['avro.codec'] = @codec.codec_name.to_s
110
+ meta['avro.schema'] = writers_schema.to_s
111
+ datum_writer.writers_schema = writers_schema
112
+ write_header
113
+ else
114
+ # open writer for reading to collect metadata
115
+ dfr = Reader.new(writer, Avro::IO::DatumReader.new)
116
+
117
+ # FIXME(jmhodges): collect arbitrary metadata
118
+ # collect metadata
119
+ @sync_marker = dfr.sync_marker
120
+ meta['avro.codec'] = dfr.meta['avro.codec']
121
+ @codec = DataFile.get_codec(meta['avro.codec'])
122
+
123
+ # get schema used to write existing file
124
+ schema_from_file = dfr.meta['avro.schema']
125
+ meta['avro.schema'] = schema_from_file
126
+ datum_writer.writers_schema = Schema.parse(schema_from_file)
127
+
128
+ # seek to the end of the file and prepare for writing
129
+ writer.seek(0,2)
130
+ end
131
+ end
132
+
133
+ # Append a datum to the file
134
+ def <<(datum)
135
+ datum_writer.write(datum, buffer_encoder)
136
+ self.block_count += 1
137
+
138
+ # if the data to write is larger than the sync interval, write
139
+ # the block
140
+ if buffer_writer.tell >= SYNC_INTERVAL
141
+ write_block
142
+ end
143
+ end
144
+
145
+ # Return the current position as a value that may be passed to
146
+ # DataFileReader.seek(long). Forces the end of the current block,
147
+ # emitting a synchronization marker.
148
+ def sync
149
+ write_block
150
+ writer.tell
151
+ end
152
+
153
+ # Flush the current state of the file, including metadata
154
+ def flush
155
+ write_block
156
+ writer.flush
157
+ end
158
+
159
+ def close
160
+ flush
161
+ writer.close
162
+ end
163
+
164
+ private
165
+
166
+ def write_header
167
+ # write magic
168
+ writer.write(MAGIC)
169
+
170
+ # write metadata
171
+ datum_writer.write_data(META_SCHEMA, meta, encoder)
172
+
173
+ # write sync marker
174
+ writer.write(sync_marker)
175
+ end
176
+
177
+ # TODO(jmhodges): make a schema for blocks and use datum_writer
178
+ # TODO(jmhodges): do we really need the number of items in the block?
179
+ def write_block
180
+ if block_count > 0
181
+ # write number of items in block and block size in bytes
182
+ encoder.write_long(block_count)
183
+ to_write = codec.compress(buffer_writer.string)
184
+ encoder.write_long(to_write.size)
185
+
186
+ # write block contents
187
+ writer.write(to_write)
188
+
189
+ # write sync marker
190
+ writer.write(sync_marker)
191
+
192
+ # reset buffer
193
+ buffer_writer.truncate(0)
194
+ buffer_writer.rewind
195
+ self.block_count = 0
196
+ end
197
+ end
198
+ end
199
+
200
+ # Read files written by DataFileWriter
201
+ class Reader
202
+ include ::Enumerable
203
+
204
+ # The reader and binary decoder for the raw file stream
205
+ attr_reader :reader, :decoder
206
+
207
+ # The binary decoder for the contents of a block (after codec decompression)
208
+ attr_reader :block_decoder
209
+
210
+ attr_reader :datum_reader, :sync_marker, :meta, :file_length, :codec
211
+ attr_accessor :block_count # records remaining in current block
212
+
213
+ def initialize(reader, datum_reader)
214
+ @reader = reader
215
+ @decoder = IO::BinaryDecoder.new(reader)
216
+ @datum_reader = datum_reader
217
+
218
+ # read the header: magic, meta, sync
219
+ read_header
220
+
221
+ @codec = DataFile.get_codec(meta['avro.codec'])
222
+
223
+ # get ready to read
224
+ @block_count = 0
225
+ datum_reader.writers_schema = Schema.parse meta['avro.schema']
226
+ end
227
+
228
+ # Iterates through each datum in this file
229
+ # TODO(jmhodges): handle block of length zero
230
+ def each
231
+ loop do
232
+ if block_count == 0
233
+ case
234
+ when eof?; break
235
+ when skip_sync
236
+ break if eof?
237
+ read_block_header
238
+ else
239
+ read_block_header
240
+ end
241
+ end
242
+
243
+ datum = datum_reader.read(block_decoder)
244
+ self.block_count -= 1
245
+ yield(datum)
246
+ end
247
+ end
248
+
249
+ def eof?; reader.eof?; end
250
+
251
+ def close
252
+ reader.close
253
+ end
254
+
255
+ private
256
+ def read_header
257
+ # seek to the beginning of the file to get magic block
258
+ reader.seek(0, 0)
259
+
260
+ # check magic number
261
+ magic_in_file = reader.read(MAGIC_SIZE)
262
+ if magic_in_file.size < MAGIC_SIZE
263
+ msg = 'Not an Avro data file: shorter than the Avro magic block'
264
+ raise DataFileError, msg
265
+ elsif magic_in_file != MAGIC
266
+ msg = "Not an Avro data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
267
+ raise DataFileError, msg
268
+ end
269
+
270
+ # read metadata
271
+ @meta = datum_reader.read_data(META_SCHEMA,
272
+ META_SCHEMA,
273
+ decoder)
274
+ # read sync marker
275
+ @sync_marker = reader.read(SYNC_SIZE)
276
+ end
277
+
278
+ def read_block_header
279
+ self.block_count = decoder.read_long
280
+ block_bytes = decoder.read_long
281
+ data = codec.decompress(reader.read(block_bytes))
282
+ @block_decoder = IO::BinaryDecoder.new(StringIO.new(data))
283
+ end
284
+
285
+ # read the length of the sync marker; if it matches the sync
286
+ # marker, return true. Otherwise, seek back to where we started
287
+ # and return false
288
+ def skip_sync
289
+ proposed_sync_marker = reader.read(SYNC_SIZE)
290
+ if proposed_sync_marker != sync_marker
291
+ reader.seek(-SYNC_SIZE, 1)
292
+ false
293
+ else
294
+ true
295
+ end
296
+ end
297
+ end
298
+
299
+
300
+ class NullCodec
301
+ def codec_name; 'null'; end
302
+ def decompress(data); data; end
303
+ def compress(data); data; end
304
+ end
305
+
306
+ class DeflateCodec
307
+ attr_reader :level
308
+
309
+ def initialize(level=Zlib::DEFAULT_COMPRESSION)
310
+ @level = level
311
+ end
312
+
313
+ def codec_name; 'deflate'; end
314
+
315
+ def decompress(compressed)
316
+ # Passing a negative number to Inflate puts it into "raw" RFC1951 mode
317
+ # (without the RFC1950 header & checksum). See the docs for
318
+ # inflateInit2 in http://www.zlib.net/manual.html
319
+ zstream = Zlib::Inflate.new(-Zlib::MAX_WBITS)
320
+ data = zstream.inflate(compressed)
321
+ data << zstream.finish
322
+ ensure
323
+ zstream.close
324
+ end
325
+
326
+ def compress(data)
327
+ zstream = Zlib::Deflate.new(level, -Zlib::MAX_WBITS)
328
+ compressed = zstream.deflate(data)
329
+ compressed << zstream.finish
330
+ ensure
331
+ zstream.close
332
+ end
333
+ end
334
+
335
+ DataFile.register_codec NullCodec
336
+ DataFile.register_codec DeflateCodec
337
+
338
+ # TODO this constant won't be updated if you register another codec.
339
+ # Deprecated in favor of Avro::DataFile::codecs
340
+ VALID_CODECS = DataFile.codecs.keys
341
+ end
342
+ end