avro-salsify-fork 1.9.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +1 -0
- data/LICENSE +203 -0
- data/Manifest +31 -0
- data/NOTICE +6 -0
- data/Rakefile +66 -0
- data/avro-salsify-fork.gemspec +35 -0
- data/avro.gemspec +35 -0
- data/interop/test_interop.rb +41 -0
- data/lib/avro.rb +42 -0
- data/lib/avro/data_file.rb +366 -0
- data/lib/avro/io.rb +619 -0
- data/lib/avro/ipc.rb +551 -0
- data/lib/avro/logical_types.rb +84 -0
- data/lib/avro/protocol.rb +161 -0
- data/lib/avro/schema.rb +434 -0
- data/lib/avro/schema_normalization.rb +83 -0
- data/test/case_finder.rb +87 -0
- data/test/random_data.rb +90 -0
- data/test/sample_ipc_client.rb +85 -0
- data/test/sample_ipc_http_client.rb +84 -0
- data/test/sample_ipc_http_server.rb +79 -0
- data/test/sample_ipc_server.rb +92 -0
- data/test/test_datafile.rb +214 -0
- data/test/test_fingerprints.rb +37 -0
- data/test/test_help.rb +23 -0
- data/test/test_io.rb +451 -0
- data/test/test_logical_types.rb +111 -0
- data/test/test_protocol.rb +199 -0
- data/test/test_schema.rb +146 -0
- data/test/test_schema_normalization.rb +171 -0
- data/test/test_socket_transport.rb +40 -0
- data/test/tool.rb +144 -0
- metadata +114 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
|
18
|
+
require 'date'
|
19
|
+
|
20
|
+
module Avro
|
21
|
+
module LogicalTypes
|
22
|
+
module IntDate
|
23
|
+
EPOCH_START = Date.new(1970, 1, 1)
|
24
|
+
|
25
|
+
def self.encode(date)
|
26
|
+
(date - EPOCH_START).to_i
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.decode(int)
|
30
|
+
EPOCH_START + int
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module TimestampMillis
|
35
|
+
def self.encode(value)
|
36
|
+
time = value.to_time
|
37
|
+
time.to_i * 1000 + time.usec / 1000
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.decode(int)
|
41
|
+
s, ms = int / 1000, int % 1000
|
42
|
+
Time.at(s, ms * 1000).utc
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
module TimestampMicros
|
47
|
+
def self.encode(value)
|
48
|
+
time = value.to_time
|
49
|
+
time.to_i * 1000_000 + time.usec
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.decode(int)
|
53
|
+
s, us = int / 1000_000, int % 1000_000
|
54
|
+
Time.at(s, us).utc
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
module Identity
|
59
|
+
def self.encode(datum)
|
60
|
+
datum
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.decode(datum)
|
64
|
+
datum
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
TYPES = {
|
69
|
+
"int" => {
|
70
|
+
"date" => IntDate
|
71
|
+
},
|
72
|
+
"long" => {
|
73
|
+
"timestamp-millis" => TimestampMillis,
|
74
|
+
"timestamp-micros" => TimestampMicros
|
75
|
+
},
|
76
|
+
}.freeze
|
77
|
+
|
78
|
+
def self.type_adapter(type, logical_type)
|
79
|
+
return unless logical_type
|
80
|
+
|
81
|
+
TYPES.fetch(type, {}).fetch(logical_type, Identity)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
module Avro
|
18
|
+
class Protocol
|
19
|
+
VALID_TYPE_SCHEMA_TYPES = Set.new(%w[enum record error fixed])
|
20
|
+
VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
|
21
|
+
class ProtocolParseError < Avro::AvroError; end
|
22
|
+
|
23
|
+
attr_reader :name, :namespace, :types, :messages, :md5
|
24
|
+
def self.parse(protocol_string)
|
25
|
+
json_data = MultiJson.load(protocol_string)
|
26
|
+
|
27
|
+
if json_data.is_a? Hash
|
28
|
+
name = json_data['protocol']
|
29
|
+
namespace = json_data['namespace']
|
30
|
+
types = json_data['types']
|
31
|
+
messages = json_data['messages']
|
32
|
+
Protocol.new(name, namespace, types, messages)
|
33
|
+
else
|
34
|
+
raise ProtocolParseError, "Not a JSON object: #{json_data}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def initialize(name, namespace=nil, types=nil, messages=nil)
|
39
|
+
# Ensure valid ctor args
|
40
|
+
if !name
|
41
|
+
raise ProtocolParseError, 'Protocols must have a non-empty name.'
|
42
|
+
elsif !name.is_a?(String)
|
43
|
+
raise ProtocolParseError, 'The name property must be a string.'
|
44
|
+
elsif !namespace.is_a?(String)
|
45
|
+
raise ProtocolParseError, 'The namespace property must be a string.'
|
46
|
+
elsif !types.is_a?(Array)
|
47
|
+
raise ProtocolParseError, 'The types property must be a list.'
|
48
|
+
elsif !messages.is_a?(Hash)
|
49
|
+
raise ProtocolParseError, 'The messages property must be a JSON object.'
|
50
|
+
end
|
51
|
+
|
52
|
+
@name = name
|
53
|
+
@namespace = namespace
|
54
|
+
type_names = {}
|
55
|
+
@types = parse_types(types, type_names)
|
56
|
+
@messages = parse_messages(messages, type_names)
|
57
|
+
@md5 = Digest::MD5.digest(to_s)
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s
|
61
|
+
MultiJson.dump to_avro
|
62
|
+
end
|
63
|
+
|
64
|
+
def ==(other)
|
65
|
+
to_avro == other.to_avro
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def parse_types(types, type_names)
|
70
|
+
type_objects = []
|
71
|
+
types.collect do |type|
|
72
|
+
# FIXME adding type.name to type_names is not defined in the
|
73
|
+
# spec. Possible bug in the python impl and the spec.
|
74
|
+
type_object = Schema.real_parse(type, type_names, namespace)
|
75
|
+
unless VALID_TYPE_SCHEMA_TYPES_SYM.include?(type_object.type_sym)
|
76
|
+
msg = "Type #{type} not an enum, record, fixed or error."
|
77
|
+
raise ProtocolParseError, msg
|
78
|
+
end
|
79
|
+
type_object
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def parse_messages(messages, names)
|
84
|
+
message_objects = {}
|
85
|
+
messages.each do |name, body|
|
86
|
+
if message_objects.has_key?(name)
|
87
|
+
raise ProtocolParseError, "Message name \"#{name}\" repeated."
|
88
|
+
elsif !body.is_a?(Hash)
|
89
|
+
raise ProtocolParseError, "Message name \"#{name}\" has non-object body #{body.inspect}"
|
90
|
+
end
|
91
|
+
|
92
|
+
request = body['request']
|
93
|
+
response = body['response']
|
94
|
+
errors = body['errors']
|
95
|
+
message_objects[name] = Message.new(name, request, response, errors, names, namespace)
|
96
|
+
end
|
97
|
+
message_objects
|
98
|
+
end
|
99
|
+
|
100
|
+
protected
|
101
|
+
def to_avro(names=Set.new)
|
102
|
+
hsh = {'protocol' => name}
|
103
|
+
hsh['namespace'] = namespace if namespace
|
104
|
+
hsh['types'] = types.map{|t| t.to_avro(names) } if types
|
105
|
+
|
106
|
+
if messages
|
107
|
+
hsh['messages'] = messages.inject({}) {|h, (k,t)| h[k] = t.to_avro(names); h }
|
108
|
+
end
|
109
|
+
|
110
|
+
hsh
|
111
|
+
end
|
112
|
+
|
113
|
+
class Message
|
114
|
+
attr_reader :name, :request, :response, :errors, :default_namespace
|
115
|
+
|
116
|
+
def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
|
117
|
+
@name = name
|
118
|
+
@default_namespace = default_namespace
|
119
|
+
@request = parse_request(request, names)
|
120
|
+
@response = parse_response(response, names)
|
121
|
+
@errors = parse_errors(errors, names) if errors
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_avro(names=Set.new)
|
125
|
+
{
|
126
|
+
'request' => request.to_avro(names),
|
127
|
+
'response' => response.to_avro(names)
|
128
|
+
}.tap do |hash|
|
129
|
+
hash['errors'] = errors.to_avro(names) if errors
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def to_s
|
134
|
+
Yajl.dump to_avro
|
135
|
+
end
|
136
|
+
|
137
|
+
def parse_request(request, names)
|
138
|
+
unless request.is_a?(Array)
|
139
|
+
raise ProtocolParseError, "Request property not an Array: #{request.inspect}"
|
140
|
+
end
|
141
|
+
Schema::RecordSchema.new(nil, default_namespace, request, names, :request)
|
142
|
+
end
|
143
|
+
|
144
|
+
def parse_response(response, names)
|
145
|
+
if response.is_a?(String) && names
|
146
|
+
fullname = Name.make_fullname(response, default_namespace)
|
147
|
+
return names[fullname] if names.include?(fullname)
|
148
|
+
end
|
149
|
+
|
150
|
+
Schema.real_parse(response, names, default_namespace)
|
151
|
+
end
|
152
|
+
|
153
|
+
def parse_errors(errors, names)
|
154
|
+
unless errors.is_a?(Array)
|
155
|
+
raise ProtocolParseError, "Errors property not an Array: #{errors}"
|
156
|
+
end
|
157
|
+
Schema.real_parse(errors, names, default_namespace)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
data/lib/avro/schema.rb
ADDED
@@ -0,0 +1,434 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
require 'avro/logical_types'
|
18
|
+
|
19
|
+
module Avro
|
20
|
+
class Schema
|
21
|
+
# Sets of strings, for backwards compatibility. See below for sets of symbols,
|
22
|
+
# for better performance.
|
23
|
+
PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
|
24
|
+
NAMED_TYPES = Set.new(%w[fixed enum record error])
|
25
|
+
|
26
|
+
VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
|
27
|
+
|
28
|
+
PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym))
|
29
|
+
NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
|
30
|
+
VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
|
31
|
+
|
32
|
+
INT_MIN_VALUE = -(1 << 31)
|
33
|
+
INT_MAX_VALUE = (1 << 31) - 1
|
34
|
+
LONG_MIN_VALUE = -(1 << 63)
|
35
|
+
LONG_MAX_VALUE = (1 << 63) - 1
|
36
|
+
|
37
|
+
def self.parse(json_string)
|
38
|
+
real_parse(MultiJson.load(json_string), {})
|
39
|
+
end
|
40
|
+
|
41
|
+
# Build Avro Schema from data parsed out of JSON string.
|
42
|
+
def self.real_parse(json_obj, names=nil, default_namespace=nil)
|
43
|
+
if json_obj.is_a? Hash
|
44
|
+
type = json_obj['type']
|
45
|
+
logical_type = json_obj['logicalType']
|
46
|
+
raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
|
47
|
+
|
48
|
+
# Check that the type is valid before calling #to_sym, since symbols are never garbage
|
49
|
+
# collected (important to avoid DoS if we're accepting schemas from untrusted clients)
|
50
|
+
unless VALID_TYPES.include?(type)
|
51
|
+
raise SchemaParseError, "Unknown type: #{type}"
|
52
|
+
end
|
53
|
+
|
54
|
+
type_sym = type.to_sym
|
55
|
+
if PRIMITIVE_TYPES_SYM.include?(type_sym)
|
56
|
+
return PrimitiveSchema.new(type_sym, logical_type)
|
57
|
+
|
58
|
+
elsif NAMED_TYPES_SYM.include? type_sym
|
59
|
+
name = json_obj['name']
|
60
|
+
namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
|
61
|
+
case type_sym
|
62
|
+
when :fixed
|
63
|
+
size = json_obj['size']
|
64
|
+
return FixedSchema.new(name, namespace, size, names, logical_type)
|
65
|
+
when :enum
|
66
|
+
symbols = json_obj['symbols']
|
67
|
+
return EnumSchema.new(name, namespace, symbols, names)
|
68
|
+
when :record, :error
|
69
|
+
fields = json_obj['fields']
|
70
|
+
return RecordSchema.new(name, namespace, fields, names, type_sym)
|
71
|
+
else
|
72
|
+
raise SchemaParseError.new("Unknown named type: #{type}")
|
73
|
+
end
|
74
|
+
|
75
|
+
else
|
76
|
+
case type_sym
|
77
|
+
when :array
|
78
|
+
return ArraySchema.new(json_obj['items'], names, default_namespace)
|
79
|
+
when :map
|
80
|
+
return MapSchema.new(json_obj['values'], names, default_namespace)
|
81
|
+
else
|
82
|
+
raise SchemaParseError.new("Unknown Valid Type: #{type}")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
elsif json_obj.is_a? Array
|
87
|
+
# JSON array (union)
|
88
|
+
return UnionSchema.new(json_obj, names, default_namespace)
|
89
|
+
elsif PRIMITIVE_TYPES.include? json_obj
|
90
|
+
return PrimitiveSchema.new(json_obj)
|
91
|
+
else
|
92
|
+
raise UnknownSchemaError.new(json_obj)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Determine if a ruby datum is an instance of a schema
|
97
|
+
def self.validate(expected_schema, datum)
|
98
|
+
case expected_schema.type_sym
|
99
|
+
when :null
|
100
|
+
datum.nil?
|
101
|
+
when :boolean
|
102
|
+
datum == true || datum == false
|
103
|
+
when :string, :bytes
|
104
|
+
datum.is_a? String
|
105
|
+
when :int
|
106
|
+
(datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
|
107
|
+
(INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
|
108
|
+
when :long
|
109
|
+
(datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
|
110
|
+
(LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
|
111
|
+
when :float, :double
|
112
|
+
datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
|
113
|
+
when :fixed
|
114
|
+
datum.is_a?(String) && datum.bytesize == expected_schema.size
|
115
|
+
when :enum
|
116
|
+
expected_schema.symbols.include? datum
|
117
|
+
when :array
|
118
|
+
datum.is_a?(Array) &&
|
119
|
+
datum.all?{|d| validate(expected_schema.items, d) }
|
120
|
+
when :map
|
121
|
+
datum.keys.all?{|k| k.is_a? String } &&
|
122
|
+
datum.values.all?{|v| validate(expected_schema.values, v) }
|
123
|
+
when :union
|
124
|
+
expected_schema.schemas.any?{|s| validate(s, datum) }
|
125
|
+
when :record, :error, :request
|
126
|
+
datum.is_a?(Hash) &&
|
127
|
+
expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
|
128
|
+
else
|
129
|
+
raise "you suck #{expected_schema.inspect} is not allowed."
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def initialize(type, logical_type=nil)
|
134
|
+
@type_sym = type.is_a?(Symbol) ? type : type.to_sym
|
135
|
+
@logical_type = logical_type
|
136
|
+
end
|
137
|
+
|
138
|
+
attr_reader :type_sym
|
139
|
+
attr_reader :logical_type
|
140
|
+
|
141
|
+
# Returns the type as a string (rather than a symbol), for backwards compatibility.
|
142
|
+
# Deprecated in favor of {#type_sym}.
|
143
|
+
def type; @type_sym.to_s; end
|
144
|
+
|
145
|
+
def type_adapter
|
146
|
+
@type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
|
147
|
+
end
|
148
|
+
|
149
|
+
# Returns the MD5 fingerprint of the schema as an Integer.
|
150
|
+
def md5_fingerprint
|
151
|
+
parsing_form = SchemaNormalization.to_parsing_form(self)
|
152
|
+
Digest::MD5.hexdigest(parsing_form).to_i(16)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Returns the SHA-256 fingerprint of the schema as an Integer.
|
156
|
+
def sha256_fingerprint
|
157
|
+
parsing_form = SchemaNormalization.to_parsing_form(self)
|
158
|
+
Digest::SHA256.hexdigest(parsing_form).to_i(16)
|
159
|
+
end
|
160
|
+
|
161
|
+
def ==(other, seen=nil)
|
162
|
+
other.is_a?(Schema) && type_sym == other.type_sym
|
163
|
+
end
|
164
|
+
|
165
|
+
def hash(seen=nil)
|
166
|
+
type_sym.hash
|
167
|
+
end
|
168
|
+
|
169
|
+
def subparse(json_obj, names=nil, namespace=nil)
|
170
|
+
if json_obj.is_a?(String) && names
|
171
|
+
fullname = Name.make_fullname(json_obj, namespace)
|
172
|
+
return names[fullname] if names.include?(fullname)
|
173
|
+
end
|
174
|
+
|
175
|
+
begin
|
176
|
+
Schema.real_parse(json_obj, names, namespace)
|
177
|
+
rescue => e
|
178
|
+
raise e if e.is_a? SchemaParseError
|
179
|
+
raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def to_avro(names=nil)
|
184
|
+
{'type' => type}
|
185
|
+
end
|
186
|
+
|
187
|
+
def to_s
|
188
|
+
MultiJson.dump to_avro
|
189
|
+
end
|
190
|
+
|
191
|
+
class NamedSchema < Schema
|
192
|
+
attr_reader :name, :namespace
|
193
|
+
def initialize(type, name, namespace=nil, names=nil, logical_type=nil)
|
194
|
+
super(type, logical_type)
|
195
|
+
@name, @namespace = Name.extract_namespace(name, namespace)
|
196
|
+
names = Name.add_name(names, self)
|
197
|
+
end
|
198
|
+
|
199
|
+
def to_avro(names=Set.new)
|
200
|
+
if @name
|
201
|
+
return fullname if names.include?(fullname)
|
202
|
+
names << fullname
|
203
|
+
end
|
204
|
+
props = {'name' => @name}
|
205
|
+
props.merge!('namespace' => @namespace) if @namespace
|
206
|
+
super.merge props
|
207
|
+
end
|
208
|
+
|
209
|
+
def fullname
|
210
|
+
@fullname ||= Name.make_fullname(@name, @namespace)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class RecordSchema < NamedSchema
|
215
|
+
attr_reader :fields
|
216
|
+
|
217
|
+
def self.make_field_objects(field_data, names, namespace=nil)
|
218
|
+
field_objects, field_names = [], Set.new
|
219
|
+
field_data.each_with_index do |field, i|
|
220
|
+
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
|
221
|
+
type = field['type']
|
222
|
+
name = field['name']
|
223
|
+
default = field['default']
|
224
|
+
order = field['order']
|
225
|
+
new_field = Field.new(type, name, default, order, names, namespace)
|
226
|
+
# make sure field name has not been used yet
|
227
|
+
if field_names.include?(new_field.name)
|
228
|
+
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
|
229
|
+
end
|
230
|
+
field_names << new_field.name
|
231
|
+
else
|
232
|
+
raise SchemaParseError, "Not a valid field: #{field}"
|
233
|
+
end
|
234
|
+
field_objects << new_field
|
235
|
+
end
|
236
|
+
field_objects
|
237
|
+
end
|
238
|
+
|
239
|
+
def initialize(name, namespace, fields, names=nil, schema_type=:record)
|
240
|
+
if schema_type == :request || schema_type == 'request'
|
241
|
+
@type_sym = schema_type.to_sym
|
242
|
+
@namespace = namespace
|
243
|
+
else
|
244
|
+
super(schema_type, name, namespace, names)
|
245
|
+
end
|
246
|
+
@fields = RecordSchema.make_field_objects(fields, names, self.namespace)
|
247
|
+
end
|
248
|
+
|
249
|
+
def fields_hash
|
250
|
+
@fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
|
251
|
+
end
|
252
|
+
|
253
|
+
def to_avro(names=Set.new)
|
254
|
+
hsh = super
|
255
|
+
return hsh unless hsh.is_a?(Hash)
|
256
|
+
hsh['fields'] = @fields.map {|f| f.to_avro(names) }
|
257
|
+
if type_sym == :request
|
258
|
+
hsh['fields']
|
259
|
+
else
|
260
|
+
hsh
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
class ArraySchema < Schema
|
266
|
+
attr_reader :items
|
267
|
+
|
268
|
+
def initialize(items, names=nil, default_namespace=nil)
|
269
|
+
super(:array)
|
270
|
+
@items = subparse(items, names, default_namespace)
|
271
|
+
end
|
272
|
+
|
273
|
+
def to_avro(names=Set.new)
|
274
|
+
super.merge('items' => items.to_avro(names))
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
class MapSchema < Schema
|
279
|
+
attr_reader :values
|
280
|
+
|
281
|
+
def initialize(values, names=nil, default_namespace=nil)
|
282
|
+
super(:map)
|
283
|
+
@values = subparse(values, names, default_namespace)
|
284
|
+
end
|
285
|
+
|
286
|
+
def to_avro(names=Set.new)
|
287
|
+
super.merge('values' => values.to_avro(names))
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
class UnionSchema < Schema
|
292
|
+
attr_reader :schemas
|
293
|
+
|
294
|
+
def initialize(schemas, names=nil, default_namespace=nil)
|
295
|
+
super(:union)
|
296
|
+
|
297
|
+
schema_objects = []
|
298
|
+
schemas.each_with_index do |schema, i|
|
299
|
+
new_schema = subparse(schema, names, default_namespace)
|
300
|
+
ns_type = new_schema.type_sym
|
301
|
+
|
302
|
+
if VALID_TYPES_SYM.include?(ns_type) &&
|
303
|
+
!NAMED_TYPES_SYM.include?(ns_type) &&
|
304
|
+
schema_objects.any?{|o| o.type_sym == ns_type }
|
305
|
+
raise SchemaParseError, "#{ns_type} is already in Union"
|
306
|
+
elsif ns_type == :union
|
307
|
+
raise SchemaParseError, "Unions cannot contain other unions"
|
308
|
+
else
|
309
|
+
schema_objects << new_schema
|
310
|
+
end
|
311
|
+
@schemas = schema_objects
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def to_avro(names=Set.new)
|
316
|
+
schemas.map {|schema| schema.to_avro(names) }
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
class EnumSchema < NamedSchema
|
321
|
+
attr_reader :symbols
|
322
|
+
def initialize(name, space, symbols, names=nil)
|
323
|
+
if symbols.uniq.length < symbols.length
|
324
|
+
fail_msg = 'Duplicate symbol: %s' % symbols
|
325
|
+
raise Avro::SchemaParseError, fail_msg
|
326
|
+
end
|
327
|
+
super(:enum, name, space, names)
|
328
|
+
@symbols = symbols
|
329
|
+
end
|
330
|
+
|
331
|
+
def to_avro(names=Set.new)
|
332
|
+
avro = super
|
333
|
+
avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
# Valid primitive types are in PRIMITIVE_TYPES.
|
338
|
+
class PrimitiveSchema < Schema
|
339
|
+
def initialize(type, logical_type=nil)
|
340
|
+
if PRIMITIVE_TYPES_SYM.include?(type)
|
341
|
+
super(type, logical_type)
|
342
|
+
elsif PRIMITIVE_TYPES.include?(type)
|
343
|
+
super(type.to_sym, logical_type)
|
344
|
+
else
|
345
|
+
raise AvroError.new("#{type} is not a valid primitive type.")
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
def to_avro(names=nil)
|
350
|
+
hsh = super
|
351
|
+
hsh.size == 1 ? type : hsh
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
class FixedSchema < NamedSchema
|
356
|
+
attr_reader :size
|
357
|
+
def initialize(name, space, size, names=nil, logical_type=nil)
|
358
|
+
# Ensure valid cto args
|
359
|
+
unless size.is_a?(Fixnum) || size.is_a?(Bignum)
|
360
|
+
raise AvroError, 'Fixed Schema requires a valid integer for size property.'
|
361
|
+
end
|
362
|
+
super(:fixed, name, space, names, logical_type)
|
363
|
+
@size = size
|
364
|
+
end
|
365
|
+
|
366
|
+
def to_avro(names=Set.new)
|
367
|
+
avro = super
|
368
|
+
avro.is_a?(Hash) ? avro.merge('size' => size) : avro
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
class Field < Schema
|
373
|
+
attr_reader :type, :name, :default, :order
|
374
|
+
|
375
|
+
def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil)
|
376
|
+
@type = subparse(type, names, namespace)
|
377
|
+
@name = name
|
378
|
+
@default = default
|
379
|
+
@order = order
|
380
|
+
end
|
381
|
+
|
382
|
+
def to_avro(names=Set.new)
|
383
|
+
{'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
|
384
|
+
avro['default'] = default if default
|
385
|
+
avro['order'] = order if order
|
386
|
+
end
|
387
|
+
end
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
class SchemaParseError < AvroError; end
|
392
|
+
|
393
|
+
class UnknownSchemaError < SchemaParseError
|
394
|
+
attr_reader :type_name
|
395
|
+
|
396
|
+
def initialize(type)
|
397
|
+
@type_name = type
|
398
|
+
super("#{type.inspect} is not a schema we know about.")
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
module Name
|
403
|
+
def self.extract_namespace(name, namespace)
|
404
|
+
parts = name.split('.')
|
405
|
+
if parts.size > 1
|
406
|
+
namespace, name = parts[0..-2].join('.'), parts.last
|
407
|
+
end
|
408
|
+
return name, namespace
|
409
|
+
end
|
410
|
+
|
411
|
+
# Add a new schema object to the names dictionary (in place).
|
412
|
+
def self.add_name(names, new_schema)
|
413
|
+
new_fullname = new_schema.fullname
|
414
|
+
if Avro::Schema::VALID_TYPES.include?(new_fullname)
|
415
|
+
raise SchemaParseError, "#{new_fullname} is a reserved type name."
|
416
|
+
elsif names.nil?
|
417
|
+
names = {}
|
418
|
+
elsif names.has_key?(new_fullname)
|
419
|
+
raise SchemaParseError, "The name \"#{new_fullname}\" is already in use."
|
420
|
+
end
|
421
|
+
|
422
|
+
names[new_fullname] = new_schema
|
423
|
+
names
|
424
|
+
end
|
425
|
+
|
426
|
+
def self.make_fullname(name, namespace)
|
427
|
+
if !name.include?('.') && !namespace.nil?
|
428
|
+
namespace + '.' + name
|
429
|
+
else
|
430
|
+
name
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|