avro-jruby 1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ v0.0.1 stuff
@@ -0,0 +1,23 @@
1
+ CHANGELOG
2
+ Manifest
3
+ Rakefile
4
+ avro.gemspec
5
+ interop/test_interop.rb
6
+ lib/avro.rb
7
+ lib/avro/collect_hash.rb
8
+ lib/avro/data_file.rb
9
+ lib/avro/io.rb
10
+ lib/avro/ipc.rb
11
+ lib/avro/protocol.rb
12
+ lib/avro/schema.rb
13
+ test/random_data.rb
14
+ test/sample_ipc_client.rb
15
+ test/sample_ipc_http_client.rb
16
+ test/sample_ipc_http_server.rb
17
+ test/sample_ipc_server.rb
18
+ test/test_datafile.rb
19
+ test/test_help.rb
20
+ test/test_io.rb
21
+ test/test_protocol.rb
22
+ test/test_socket_transport.rb
23
+ test/tool.rb
@@ -0,0 +1,63 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'rubygems'
18
+ require 'echoe'
19
+ VERSION = File.open('../../share/VERSION.txt').read.sub('-SNAPSHOT', '.pre1').chomp
20
+ Echoe.new('avro', VERSION) do |p|
21
+ p.author = "Apache Software Foundation"
22
+ p.email = "avro-dev@hadoop.apache.org"
23
+ p.summary = "Apache Avro for Ruby"
24
+ p.description = "Avro is a data serialization and RPC format"
25
+ p.url = "http://hadoop.apache.org/avro/"
26
+ p.runtime_dependencies = %w[multi-json]
27
+ end
28
+
29
+ t = Rake::TestTask.new(:interop)
30
+ t.pattern = 'interop/test*.rb'
31
+
32
+ task :generate_interop do
33
+ $:.unshift(HERE + '/lib')
34
+ $:.unshift(HERE + '/test')
35
+ require 'avro'
36
+ require 'random_data'
37
+
38
+ schema = Avro::Schema.parse(File.read(SCHEMAS + '/interop.avsc'))
39
+ r = RandomData.new(schema, ENV['SEED'])
40
+ f = File.open(BUILD + '/interop/data/ruby.avro', 'w')
41
+ writer = Avro::DataFile::Writer.new(f, Avro::IO::DatumWriter.new(schema), schema)
42
+ begin
43
+ writer << r.next
44
+ writer << r.next
45
+ ensure
46
+ writer.close
47
+ end
48
+
49
+ Avro::DataFile.open(BUILD + '/interop/data/ruby_deflate.avro', 'w', schema.to_s, :deflate) do |writer|
50
+ 20.times { writer << r.next }
51
+ end
52
+ end
53
+
54
+
55
+ HERE = File.expand_path(File.dirname(__FILE__))
56
+ SHARE = HERE + '/../../share'
57
+ SCHEMAS = SHARE + '/test/schemas'
58
+ BUILD = HERE + '/../../build'
59
+
60
+ task :dist => [:gem] do
61
+ mkdir_p "../../dist/ruby"
62
+ cp "pkg/avro-#{VERSION}.gem", "../../dist/ruby"
63
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # stub: avro 1.7.5 ruby lib
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "avro-jruby"
6
+ s.version = "1.7.5"
7
+
8
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
9
+ s.authors = ["Apache Software Foundation"]
10
+ s.date = "2013-10-25"
11
+ s.description = "Avro is a data serialization and RPC format"
12
+ s.email = "avro-dev@hadoop.apache.org"
13
+ s.extra_rdoc_files = ["CHANGELOG", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb"]
14
+ s.files = ["CHANGELOG", "Manifest", "Rakefile", "avro-jruby.gemspec", "interop/test_interop.rb", "lib/avro.rb", "lib/avro/collect_hash.rb", "lib/avro/data_file.rb", "lib/avro/io.rb", "lib/avro/ipc.rb", "lib/avro/protocol.rb", "lib/avro/schema.rb", "test/random_data.rb", "test/sample_ipc_client.rb", "test/sample_ipc_http_client.rb", "test/sample_ipc_http_server.rb", "test/sample_ipc_server.rb", "test/test_datafile.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_socket_transport.rb", "test/tool.rb", "test/test_schema.rb"]
15
+ s.homepage = "https://github.com/aia/avro-gem-jruby"
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Avro"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = "avro-jruby"
19
+ s.rubygems_version = "2.1.9"
20
+ s.summary = "Apache Avro for Ruby"
21
+ s.test_files = ["test/test_datafile.rb", "test/test_help.rb", "test/test_io.rb", "test/test_protocol.rb", "test/test_schema.rb", "test/test_socket_transport.rb"]
22
+
23
+ if s.respond_to? :specification_version then
24
+ s.specification_version = 4
25
+
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
+ s.add_runtime_dependency(%q<multi_json>, [">= 0"])
28
+ else
29
+ s.add_dependency(%q<multi_json>, [">= 0"])
30
+ end
31
+ else
32
+ s.add_dependency(%q<multi_json>, [">= 0"])
33
+ end
34
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ require 'rubygems'
19
+ require 'test/unit'
20
+ require 'avro'
21
+
22
+ class TestInterop < Test::Unit::TestCase
23
+ HERE = File.expand_path(File.dirname(__FILE__))
24
+ SHARE = HERE + '/../../../share'
25
+ SCHEMAS = SHARE + '/test/schemas'
26
+ Dir[HERE + '/../../../build/interop/data/*'].each do |fn|
27
+ define_method("test_read_#{File.basename(fn, 'avro')}") do
28
+ projection = Avro::Schema.parse(File.read(SCHEMAS+'/interop.avsc'))
29
+
30
+ File.open(fn) do |f|
31
+ r = Avro::DataFile::Reader.new(f, Avro::IO::DatumReader.new(projection))
32
+ i = 0
33
+ r.each do |datum|
34
+ i += 1
35
+ assert_not_nil datum, "nil datum from #{fn}"
36
+ end
37
+ assert_not_equal 0, i, "no data read in from #{fn}"
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,42 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'multi_json'
18
+ require 'set'
19
+ require 'digest/md5'
20
+ require 'net/http'
21
+ require 'stringio'
22
+ require 'zlib'
23
+
24
+ module Avro
25
+ VERSION = "FIXME"
26
+
27
+ class AvroError < StandardError; end
28
+
29
+ class AvroTypeError < Avro::AvroError
30
+ def initialize(schm=nil, datum=nil, msg=nil)
31
+ msg ||= "Not a #{schm.to_s}: #{datum}"
32
+ super(msg)
33
+ end
34
+ end
35
+ end
36
+
37
+ require 'avro/collect_hash'
38
+ require 'avro/schema'
39
+ require 'avro/io'
40
+ require 'avro/data_file'
41
+ require 'avro/protocol'
42
+ require 'avro/ipc'
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Enumerable
18
+ def collect_hash
19
+ inject(Hash.new) do |memo, i|
20
+ k, v = yield(i)
21
+ memo[k] = v if k
22
+ memo
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,342 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'openssl'
18
+
19
+ module Avro
20
+ module DataFile
21
+ VERSION = 1
22
+ MAGIC = "Obj" + [VERSION].pack('c')
23
+ MAGIC_SIZE = MAGIC.size
24
+ SYNC_SIZE = 16
25
+ SYNC_INTERVAL = 1000 * SYNC_SIZE
26
+ META_SCHEMA = Schema.parse('{"type": "map", "values": "bytes"}')
27
+ VALID_ENCODINGS = ['binary'] # not used yet
28
+
29
+ class DataFileError < AvroError; end
30
+
31
+ def self.open(file_path, mode='r', schema=nil, codec=nil)
32
+ schema = Avro::Schema.parse(schema) if schema
33
+ case mode
34
+ when 'w'
35
+ unless schema
36
+ raise DataFileError, "Writing an Avro file requires a schema."
37
+ end
38
+ io = open_writer(File.open(file_path, 'wb'), schema, codec)
39
+ when 'r'
40
+ io = open_reader(File.open(file_path, 'rb'), schema)
41
+ else
42
+ raise DataFileError, "Only modes 'r' and 'w' allowed. You gave #{mode.inspect}."
43
+ end
44
+
45
+ yield io if block_given?
46
+ io
47
+ ensure
48
+ io.close if block_given? && io
49
+ end
50
+
51
+ def self.codecs
52
+ @codecs
53
+ end
54
+
55
+ def self.register_codec(codec)
56
+ @codecs ||= {}
57
+ codec = codec.new if !codec.respond_to?(:codec_name) && codec.is_a?(Class)
58
+ @codecs[codec.codec_name.to_s] = codec
59
+ end
60
+
61
+ def self.get_codec(codec)
62
+ codec ||= 'null'
63
+ if codec.respond_to?(:compress) && codec.respond_to?(:decompress)
64
+ codec # it's a codec instance
65
+ elsif codec.is_a?(Class)
66
+ codec.new # it's a codec class
67
+ elsif @codecs.include?(codec.to_s)
68
+ @codecs[codec.to_s] # it's a string or symbol (codec name)
69
+ else
70
+ raise DataFileError, "Unknown codec: #{codec.inspect}"
71
+ end
72
+ end
73
+
74
+ class << self
75
+ private
76
+ def open_writer(file, schema, codec=nil)
77
+ writer = Avro::IO::DatumWriter.new(schema)
78
+ Avro::DataFile::Writer.new(file, writer, schema, codec)
79
+ end
80
+
81
+ def open_reader(file, schema)
82
+ reader = Avro::IO::DatumReader.new(nil, schema)
83
+ Avro::DataFile::Reader.new(file, reader)
84
+ end
85
+ end
86
+
87
+ class Writer
88
+ def self.generate_sync_marker
89
+ OpenSSL::Random.random_bytes(16)
90
+ end
91
+
92
+ attr_reader :writer, :encoder, :datum_writer, :buffer_writer, :buffer_encoder, :sync_marker, :meta, :codec
93
+ attr_accessor :block_count
94
+
95
+ def initialize(writer, datum_writer, writers_schema=nil, codec=nil)
96
+ # If writers_schema is not present, presume we're appending
97
+ @writer = writer
98
+ @encoder = IO::BinaryEncoder.new(@writer)
99
+ @datum_writer = datum_writer
100
+ @buffer_writer = StringIO.new('', 'w')
101
+ @buffer_encoder = IO::BinaryEncoder.new(@buffer_writer)
102
+ @block_count = 0
103
+
104
+ @meta = {}
105
+
106
+ if writers_schema
107
+ @sync_marker = Writer.generate_sync_marker
108
+ @codec = DataFile.get_codec(codec)
109
+ meta['avro.codec'] = @codec.codec_name.to_s
110
+ meta['avro.schema'] = writers_schema.to_s
111
+ datum_writer.writers_schema = writers_schema
112
+ write_header
113
+ else
114
+ # open writer for reading to collect metadata
115
+ dfr = Reader.new(writer, Avro::IO::DatumReader.new)
116
+
117
+ # FIXME(jmhodges): collect arbitrary metadata
118
+ # collect metadata
119
+ @sync_marker = dfr.sync_marker
120
+ meta['avro.codec'] = dfr.meta['avro.codec']
121
+ @codec = DataFile.get_codec(meta['avro.codec'])
122
+
123
+ # get schema used to write existing file
124
+ schema_from_file = dfr.meta['avro.schema']
125
+ meta['avro.schema'] = schema_from_file
126
+ datum_writer.writers_schema = Schema.parse(schema_from_file)
127
+
128
+ # seek to the end of the file and prepare for writing
129
+ writer.seek(0,2)
130
+ end
131
+ end
132
+
133
+ # Append a datum to the file
134
+ def <<(datum)
135
+ datum_writer.write(datum, buffer_encoder)
136
+ self.block_count += 1
137
+
138
+ # if the data to write is larger than the sync interval, write
139
+ # the block
140
+ if buffer_writer.tell >= SYNC_INTERVAL
141
+ write_block
142
+ end
143
+ end
144
+
145
+ # Return the current position as a value that may be passed to
146
+ # DataFileReader.seek(long). Forces the end of the current block,
147
+ # emitting a synchronization marker.
148
+ def sync
149
+ write_block
150
+ writer.tell
151
+ end
152
+
153
+ # Flush the current state of the file, including metadata
154
+ def flush
155
+ write_block
156
+ writer.flush
157
+ end
158
+
159
+ def close
160
+ flush
161
+ writer.close
162
+ end
163
+
164
+ private
165
+
166
+ def write_header
167
+ # write magic
168
+ writer.write(MAGIC)
169
+
170
+ # write metadata
171
+ datum_writer.write_data(META_SCHEMA, meta, encoder)
172
+
173
+ # write sync marker
174
+ writer.write(sync_marker)
175
+ end
176
+
177
+ # TODO(jmhodges): make a schema for blocks and use datum_writer
178
+ # TODO(jmhodges): do we really need the number of items in the block?
179
+ def write_block
180
+ if block_count > 0
181
+ # write number of items in block and block size in bytes
182
+ encoder.write_long(block_count)
183
+ to_write = codec.compress(buffer_writer.string)
184
+ encoder.write_long(to_write.size)
185
+
186
+ # write block contents
187
+ writer.write(to_write)
188
+
189
+ # write sync marker
190
+ writer.write(sync_marker)
191
+
192
+ # reset buffer
193
+ buffer_writer.truncate(0)
194
+ buffer_writer.rewind
195
+ self.block_count = 0
196
+ end
197
+ end
198
+ end
199
+
200
+ # Read files written by DataFileWriter
201
+ class Reader
202
+ include ::Enumerable
203
+
204
+ # The reader and binary decoder for the raw file stream
205
+ attr_reader :reader, :decoder
206
+
207
+ # The binary decoder for the contents of a block (after codec decompression)
208
+ attr_reader :block_decoder
209
+
210
+ attr_reader :datum_reader, :sync_marker, :meta, :file_length, :codec
211
+ attr_accessor :block_count # records remaining in current block
212
+
213
+ def initialize(reader, datum_reader)
214
+ @reader = reader
215
+ @decoder = IO::BinaryDecoder.new(reader)
216
+ @datum_reader = datum_reader
217
+
218
+ # read the header: magic, meta, sync
219
+ read_header
220
+
221
+ @codec = DataFile.get_codec(meta['avro.codec'])
222
+
223
+ # get ready to read
224
+ @block_count = 0
225
+ datum_reader.writers_schema = Schema.parse meta['avro.schema']
226
+ end
227
+
228
+ # Iterates through each datum in this file
229
+ # TODO(jmhodges): handle block of length zero
230
+ def each
231
+ loop do
232
+ if block_count == 0
233
+ case
234
+ when eof?; break
235
+ when skip_sync
236
+ break if eof?
237
+ read_block_header
238
+ else
239
+ read_block_header
240
+ end
241
+ end
242
+
243
+ datum = datum_reader.read(block_decoder)
244
+ self.block_count -= 1
245
+ yield(datum)
246
+ end
247
+ end
248
+
249
+ def eof?; reader.eof?; end
250
+
251
+ def close
252
+ reader.close
253
+ end
254
+
255
+ private
256
+ def read_header
257
+ # seek to the beginning of the file to get magic block
258
+ reader.seek(0, 0)
259
+
260
+ # check magic number
261
+ magic_in_file = reader.read(MAGIC_SIZE)
262
+ if magic_in_file.size < MAGIC_SIZE
263
+ msg = 'Not an Avro data file: shorter than the Avro magic block'
264
+ raise DataFileError, msg
265
+ elsif magic_in_file != MAGIC
266
+ msg = "Not an Avro data file: #{magic_in_file.inspect} doesn't match #{MAGIC.inspect}"
267
+ raise DataFileError, msg
268
+ end
269
+
270
+ # read metadata
271
+ @meta = datum_reader.read_data(META_SCHEMA,
272
+ META_SCHEMA,
273
+ decoder)
274
+ # read sync marker
275
+ @sync_marker = reader.read(SYNC_SIZE)
276
+ end
277
+
278
+ def read_block_header
279
+ self.block_count = decoder.read_long
280
+ block_bytes = decoder.read_long
281
+ data = codec.decompress(reader.read(block_bytes))
282
+ @block_decoder = IO::BinaryDecoder.new(StringIO.new(data))
283
+ end
284
+
285
+ # read the length of the sync marker; if it matches the sync
286
+ # marker, return true. Otherwise, seek back to where we started
287
+ # and return false
288
+ def skip_sync
289
+ proposed_sync_marker = reader.read(SYNC_SIZE)
290
+ if proposed_sync_marker != sync_marker
291
+ reader.seek(-SYNC_SIZE, 1)
292
+ false
293
+ else
294
+ true
295
+ end
296
+ end
297
+ end
298
+
299
+
300
+ class NullCodec
301
+ def codec_name; 'null'; end
302
+ def decompress(data); data; end
303
+ def compress(data); data; end
304
+ end
305
+
306
+ class DeflateCodec
307
+ attr_reader :level
308
+
309
+ def initialize(level=Zlib::DEFAULT_COMPRESSION)
310
+ @level = level
311
+ end
312
+
313
+ def codec_name; 'deflate'; end
314
+
315
+ def decompress(compressed)
316
+ # Passing a negative number to Inflate puts it into "raw" RFC1951 mode
317
+ # (without the RFC1950 header & checksum). See the docs for
318
+ # inflateInit2 in http://www.zlib.net/manual.html
319
+ zstream = Zlib::Inflate.new(-Zlib::MAX_WBITS)
320
+ data = zstream.inflate(compressed)
321
+ data << zstream.finish
322
+ ensure
323
+ zstream.close
324
+ end
325
+
326
+ def compress(data)
327
+ zstream = Zlib::Deflate.new(level, -Zlib::MAX_WBITS)
328
+ compressed = zstream.deflate(data)
329
+ compressed << zstream.finish
330
+ ensure
331
+ zstream.close
332
+ end
333
+ end
334
+
335
+ DataFile.register_codec NullCodec
336
+ DataFile.register_codec DeflateCodec
337
+
338
+ # TODO this constant won't be updated if you register another codec.
339
+ # Deprecated in favor of Avro::DataFile::codecs
340
+ VALID_CODECS = DataFile.codecs.keys
341
+ end
342
+ end