avro 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +1 -0
- data/Manifest +19 -0
- data/Rakefile +59 -0
- data/avro.gemspec +34 -0
- data/interop/test_interop.rb +41 -0
- data/lib/avro.rb +39 -0
- data/lib/avro/collect_hash.rb +25 -0
- data/lib/avro/data_file.rb +243 -0
- data/lib/avro/io.rb +572 -0
- data/lib/avro/ipc.rb +443 -0
- data/lib/avro/protocol.rb +160 -0
- data/lib/avro/schema.rb +431 -0
- data/test/random_data.rb +90 -0
- data/test/sample_ipc_client.rb +86 -0
- data/test/sample_ipc_server.rb +91 -0
- data/test/test_help.rb +23 -0
- data/test/test_io.rb +361 -0
- data/test/test_protocol.rb +192 -0
- data/tmp/test.rb.avro +0 -0
- metadata +94 -0
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
v0.0.1 stuff
|
data/Manifest
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
CHANGELOG
|
2
|
+
Manifest
|
3
|
+
Rakefile
|
4
|
+
avro.gemspec
|
5
|
+
interop/test_interop.rb
|
6
|
+
lib/avro.rb
|
7
|
+
lib/avro/collect_hash.rb
|
8
|
+
lib/avro/data_file.rb
|
9
|
+
lib/avro/io.rb
|
10
|
+
lib/avro/ipc.rb
|
11
|
+
lib/avro/protocol.rb
|
12
|
+
lib/avro/schema.rb
|
13
|
+
test/random_data.rb
|
14
|
+
test/sample_ipc_client.rb
|
15
|
+
test/sample_ipc_server.rb
|
16
|
+
test/test_help.rb
|
17
|
+
test/test_io.rb
|
18
|
+
test/test_protocol.rb
|
19
|
+
tmp/test.rb.avro
|
data/Rakefile
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'rubygems'
|
18
|
+
require 'echoe'
|
19
|
+
VERSION = File.open('../../share/VERSION.txt').read
|
20
|
+
Echoe.new('avro', VERSION) do |p|
|
21
|
+
p.author = "Apache Software Foundation"
|
22
|
+
p.email = "avro-dev@hadoop.apache.org"
|
23
|
+
p.summary = "Apache Avro for Ruby"
|
24
|
+
p.description = "Apache is a data serialization and RPC format"
|
25
|
+
p.url = "http://hadoop.apache.org/avro/"
|
26
|
+
p.runtime_dependencies = %w[yajl-ruby]
|
27
|
+
end
|
28
|
+
|
29
|
+
t = Rake::TestTask.new(:interop)
|
30
|
+
t.pattern = 'interop/test*.rb'
|
31
|
+
|
32
|
+
task :generate_interop do
|
33
|
+
$:.unshift(HERE + '/lib')
|
34
|
+
$:.unshift(HERE + '/test')
|
35
|
+
require 'avro'
|
36
|
+
require 'random_data'
|
37
|
+
|
38
|
+
schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
|
39
|
+
r = RandomData.new(schema, ENV['SEED'])
|
40
|
+
f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
|
41
|
+
writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
|
42
|
+
begin
|
43
|
+
writer << r.next
|
44
|
+
writer << r.next
|
45
|
+
ensure
|
46
|
+
writer.close
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
HERE = File.expand_path(File.dirname(__FILE__))
|
52
|
+
SHARE = HERE + '/../../share'
|
53
|
+
SCHEMAS = SHARE + '/test/schemas'
|
54
|
+
BUILD = HERE + '/../../build'
|
55
|
+
|
56
|
+
task :dist => [:manifest, :gem] do
|
57
|
+
mkdir_p "../../dist/ruby"
|
58
|
+
cp "pkg/avro-#{VERSION}.gem", "../../dist/ruby"
|
59
|
+
end
|
data/avro.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{avro}
|
5
|
+
s.version = "1.3.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Apache Software Foundation"]
|
9
|
+
s.date = %q{2010-03-01}
|
10
|
+
s.description = %q{Apache is a data serialization and RPC format}
|
11
|
+
s.email = %q{avro-dev@hadoop.apache.org}
|
12
|
+
s.extra_rdoc_files = ["CHANGELOG", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb"]
|
13
|
+
s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_server.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "tmp/test.rb.avro"]
|
14
|
+
s.homepage = %q{http://hadoop.apache.org/avro/}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Avro"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{avro}
|
18
|
+
s.rubygems_version = %q{1.3.5}
|
19
|
+
s.summary = %q{Apache Avro for Ruby}
|
20
|
+
s.test_files = ["test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb"]
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
+
s.specification_version = 3
|
25
|
+
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
+
s.add_runtime_dependency(%q<yajl-ruby>, [">= 0"])
|
28
|
+
else
|
29
|
+
s.add_dependency(%q<yajl-ruby>, [">= 0"])
|
30
|
+
end
|
31
|
+
else
|
32
|
+
s.add_dependency(%q<yajl-ruby>, [">= 0"])
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'rubygems'
|
19
|
+
require 'test/unit'
|
20
|
+
require 'avro'
|
21
|
+
|
22
|
+
class TestInterop < Test::Unit::TestCase
|
23
|
+
HERE = File.expand_path(File.dirname(__FILE__))
|
24
|
+
SHARE = HERE + '/../../../share'
|
25
|
+
SCHEMAS = SHARE + '/test/schemas'
|
26
|
+
Dir[HERE + '/../../../build/interop/data/*'].each do |fn|
|
27
|
+
define_method("test_read_#{File.basename(fn, 'avro')}") do
|
28
|
+
projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
|
29
|
+
|
30
|
+
File.open(fn) do |f|
|
31
|
+
r = Avro::DataFile::Reader.new(f, Avro::IO::DatumReader.new(projection))
|
32
|
+
i = 0
|
33
|
+
r.each do |datum|
|
34
|
+
i += 1
|
35
|
+
assert_not_nil datum, "nil datum from #{fn}"
|
36
|
+
end
|
37
|
+
assert_not_equal 0, i, "no data read in from #{fn}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/avro.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'yajl'
|
18
|
+
require 'set'
|
19
|
+
require 'md5'
|
20
|
+
|
21
|
+
module Avro
|
22
|
+
VERSION = "FIXME"
|
23
|
+
|
24
|
+
class AvroError < StandardError; end
|
25
|
+
|
26
|
+
class AvroTypeError < Avro::AvroError
|
27
|
+
def initialize(schm=nil, datum=nil, msg=nil)
|
28
|
+
msg ||= "Not a #{schm.to_s}: #{datum}"
|
29
|
+
super(msg)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'avro/collect_hash'
|
35
|
+
require 'avro/schema'
|
36
|
+
require 'avro/io'
|
37
|
+
require 'avro/data_file'
|
38
|
+
require 'avro/protocol'
|
39
|
+
require 'avro/ipc'
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
module Enumerable
|
18
|
+
def collect_hash
|
19
|
+
inject(Hash.new) do |memo, i|
|
20
|
+
k, v = yield(i)
|
21
|
+
memo[k] = v if k
|
22
|
+
memo
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'openssl'
|
18
|
+
|
19
|
+
module Avro
|
20
|
+
module DataFile
|
21
|
+
VERSION = 1
|
22
|
+
MAGIC = "Obj" + [VERSION].pack('c')
|
23
|
+
MAGIC_SIZE = MAGIC.size
|
24
|
+
SYNC_SIZE = 16
|
25
|
+
SYNC_INTERVAL = 1000 * SYNC_SIZE
|
26
|
+
META_SCHEMA = Schema.parse('{"type": "map", "values": "bytes"}')
|
27
|
+
VALID_CODECS = ['null']
|
28
|
+
VALID_ENCODINGS = ['binary'] # not used yet
|
29
|
+
|
30
|
+
class DataFileError < AvroError; end
|
31
|
+
|
32
|
+
class Writer
|
33
|
+
def self.generate_sync_marker
|
34
|
+
OpenSSL::Random.random_bytes(16)
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :writer, :encoder, :datum_writer, :buffer_writer, :buffer_encoder, :sync_marker, :meta
|
38
|
+
attr_accessor :block_count
|
39
|
+
|
40
|
+
def initialize(writer, datum_writer, writers_schema=nil)
|
41
|
+
# If writers_schema is not present, presume we're appending
|
42
|
+
@writer = writer
|
43
|
+
@encoder = IO::BinaryEncoder.new(@writer)
|
44
|
+
@datum_writer = datum_writer
|
45
|
+
@buffer_writer = StringIO.new('', 'w')
|
46
|
+
@buffer_encoder = IO::BinaryEncoder.new(@buffer_writer)
|
47
|
+
@block_count = 0
|
48
|
+
|
49
|
+
@meta = {}
|
50
|
+
|
51
|
+
if writers_schema
|
52
|
+
@sync_marker = Writer.generate_sync_marker
|
53
|
+
meta['avro.codec'] = 'null'
|
54
|
+
meta['avro.schema'] = writers_schema.to_s
|
55
|
+
datum_writer.writers_schema = writers_schema
|
56
|
+
write_header
|
57
|
+
else
|
58
|
+
# open writer for reading to collect metadata
|
59
|
+
dfr = Reader.new(writer, Avro::IO::DatumReader.new)
|
60
|
+
|
61
|
+
# FIXME(jmhodges): collect arbitrary metadata
|
62
|
+
# collect metadata
|
63
|
+
@sync_marker = dfr.sync_marker
|
64
|
+
meta['avro.codec'] = dfr.meta['avro.codec']
|
65
|
+
|
66
|
+
# get schema used to write existing file
|
67
|
+
schema_from_file = dfr.meta['avro.schema']
|
68
|
+
meta['avro.schema'] = schema_from_file
|
69
|
+
datum_writer.writers_schema = Schema.parse(schema_from_file)
|
70
|
+
|
71
|
+
# seek to the end of the file and prepare for writing
|
72
|
+
writer.seek(0,2)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Append a datum to the file
|
77
|
+
def <<(datum)
|
78
|
+
datum_writer.write(datum, buffer_encoder)
|
79
|
+
self.block_count += 1
|
80
|
+
|
81
|
+
# if the data to write is larger than the sync interval, write
|
82
|
+
# the block
|
83
|
+
if buffer_writer.tell >= SYNC_INTERVAL
|
84
|
+
write_block
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return the current position as a value that may be passed to
|
89
|
+
# DataFileReader.seek(long). Forces the end of the current block,
|
90
|
+
# emitting a synchronization marker.
|
91
|
+
def sync
|
92
|
+
write_block
|
93
|
+
writer.tell
|
94
|
+
end
|
95
|
+
|
96
|
+
# Flush the current state of the file, including metadata
|
97
|
+
def flush
|
98
|
+
write_block
|
99
|
+
writer.flush
|
100
|
+
end
|
101
|
+
|
102
|
+
def close
|
103
|
+
flush
|
104
|
+
writer.close
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def write_header
|
110
|
+
# write magic
|
111
|
+
writer.write(MAGIC)
|
112
|
+
|
113
|
+
# write metadata
|
114
|
+
datum_writer.write_data(META_SCHEMA, meta, encoder)
|
115
|
+
|
116
|
+
# write sync marker
|
117
|
+
writer.write(sync_marker)
|
118
|
+
end
|
119
|
+
|
120
|
+
# TODO(jmhodges): make a schema for blocks and use datum_writer
|
121
|
+
# TODO(jmhodges): do we really need the number of items in the block?
|
122
|
+
# TODO(jmhodges): use codec when writing the block contents
|
123
|
+
def write_block
|
124
|
+
if block_count > 0
|
125
|
+
# write number of items in block and block size in bytes
|
126
|
+
encoder.write_long(block_count)
|
127
|
+
to_write = buffer_writer.string
|
128
|
+
encoder.write_long(to_write.size)
|
129
|
+
|
130
|
+
# write block contents
|
131
|
+
if meta['avro.codec'] == 'null'
|
132
|
+
writer.write(to_write)
|
133
|
+
else
|
134
|
+
msg = "#{meta['avro.codec'].inspect} coded is not supported"
|
135
|
+
raise DataFileError, msg
|
136
|
+
end
|
137
|
+
|
138
|
+
# write sync marker
|
139
|
+
writer.write(sync_marker)
|
140
|
+
|
141
|
+
# reset buffer
|
142
|
+
buffer_writer.truncate(0)
|
143
|
+
self.block_count = 0
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Read files written by DataFileWriter
|
149
|
+
class Reader
|
150
|
+
include ::Enumerable
|
151
|
+
|
152
|
+
attr_reader :reader, :decoder, :datum_reader, :sync_marker, :meta, :file_length
|
153
|
+
attr_accessor :block_count
|
154
|
+
|
155
|
+
def initialize(reader, datum_reader)
|
156
|
+
@reader = reader
|
157
|
+
@decoder = IO::BinaryDecoder.new(reader)
|
158
|
+
@datum_reader = datum_reader
|
159
|
+
|
160
|
+
# read the header: magic, meta, sync
|
161
|
+
read_header
|
162
|
+
|
163
|
+
# ensure the codec is valid
|
164
|
+
codec_from_file = meta['avro.codec']
|
165
|
+
if codec_from_file && ! VALID_CODECS.include?(codec_from_file)
|
166
|
+
raise DataFileError, "Unknown codec: #{codec_from_file}"
|
167
|
+
end
|
168
|
+
|
169
|
+
# get ready to read
|
170
|
+
@block_count = 0
|
171
|
+
datum_reader.writers_schema = Schema.parse meta['avro.schema']
|
172
|
+
end
|
173
|
+
|
174
|
+
# Iterates through each datum in this file
|
175
|
+
# TODO(jmhodges): handle block of length zero
|
176
|
+
def each
|
177
|
+
loop do
|
178
|
+
if block_count == 0
|
179
|
+
case
|
180
|
+
when eof?; break
|
181
|
+
when skip_sync
|
182
|
+
break if eof?
|
183
|
+
read_block_header
|
184
|
+
else
|
185
|
+
read_block_header
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
datum = datum_reader.read(decoder)
|
190
|
+
self.block_count -= 1
|
191
|
+
yield(datum)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def eof?; reader.eof?; end
|
196
|
+
|
197
|
+
def close
|
198
|
+
reader.close
|
199
|
+
end
|
200
|
+
|
201
|
+
private
|
202
|
+
def read_header
|
203
|
+
# seek to the beginning of the file to get magic block
|
204
|
+
reader.seek(0, 0)
|
205
|
+
|
206
|
+
# check magic number
|
207
|
+
magic_in_file = reader.read(MAGIC_SIZE)
|
208
|
+
if magic_in_file.size < MAGIC_SIZE
|
209
|
+
msg = 'Not an Avro data file: shorter than the Avro magic block'
|
210
|
+
raise DataFileError, msg
|
211
|
+
elsif magic_in_file != MAGIC
|
212
|
+
msg = "Not an Avro data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
|
213
|
+
raise DataFileError, msg
|
214
|
+
end
|
215
|
+
|
216
|
+
# read metadata
|
217
|
+
@meta = datum_reader.read_data(META_SCHEMA,
|
218
|
+
META_SCHEMA,
|
219
|
+
decoder)
|
220
|
+
# read sync marker
|
221
|
+
@sync_marker = reader.read(SYNC_SIZE)
|
222
|
+
end
|
223
|
+
|
224
|
+
def read_block_header
|
225
|
+
self.block_count = decoder.read_long
|
226
|
+
decoder.read_long # not doing anything with length in bytes
|
227
|
+
end
|
228
|
+
|
229
|
+
# read the length of the sync marker; if it matches the sync
|
230
|
+
# marker, return true. Otherwise, seek back to where we started
|
231
|
+
# and return false
|
232
|
+
def skip_sync
|
233
|
+
proposed_sync_marker = reader.read(SYNC_SIZE)
|
234
|
+
if proposed_sync_marker != sync_marker
|
235
|
+
reader.seek(-SYNC_SIZE, 1)
|
236
|
+
false
|
237
|
+
else
|
238
|
+
true
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|