schema_registry_client 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/lint.yml +18 -0
- data/.github/workflows/release.yml +31 -0
- data/.github/workflows/test.yml +22 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +39 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +150 -0
- data/LICENSE +20 -0
- data/README.md +48 -0
- data/Rakefile +3 -0
- data/lib/schema_registry_client/avro_schema_store.rb +127 -0
- data/lib/schema_registry_client/cached_confluent_schema_registry.rb +57 -0
- data/lib/schema_registry_client/confluent_schema_registry.rb +118 -0
- data/lib/schema_registry_client/output/json_schema.rb +78 -0
- data/lib/schema_registry_client/output/proto_text.rb +320 -0
- data/lib/schema_registry_client/schema/avro.rb +61 -0
- data/lib/schema_registry_client/schema/base.rb +44 -0
- data/lib/schema_registry_client/schema/proto_json_schema.rb +30 -0
- data/lib/schema_registry_client/schema/protobuf.rb +131 -0
- data/lib/schema_registry_client/version.rb +5 -0
- data/lib/schema_registry_client/wire.rb +30 -0
- data/lib/schema_registry_client.rb +156 -0
- data/schema_registry_client.gemspec +33 -0
- data/spec/decoding_spec.rb +183 -0
- data/spec/encoding_spec.rb +207 -0
- data/spec/gen/everything/everything_pb.rb +26 -0
- data/spec/gen/referenced/referer_pb.rb +24 -0
- data/spec/gen/simple/simple_pb.rb +18 -0
- data/spec/json_schema_spec.rb +12 -0
- data/spec/proto_text_spec.rb +10 -0
- data/spec/schemas/everything/everything.json +328 -0
- data/spec/schemas/everything/everything.proto +105 -0
- data/spec/schemas/referenced/referenced.json +16 -0
- data/spec/schemas/referenced/referer.proto +28 -0
- data/spec/schemas/referenced/v1/MessageBA.avsc +21 -0
- data/spec/schemas/simple/simple.json +12 -0
- data/spec/schemas/simple/simple.proto +12 -0
- data/spec/schemas/simple/v1/SimpleMessage.avsc +11 -0
- data/spec/spec_helper.rb +16 -0
- metadata +46 -9
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class SchemaRegistry
|
|
4
|
+
module Output
|
|
5
|
+
module JsonSchema
|
|
6
|
+
class << self
|
|
7
|
+
def fetch(message_name)
|
|
8
|
+
name = message_name.start_with?('.') ? message_name[1..] : message_name
|
|
9
|
+
Google::Protobuf::DescriptorPool.generated_pool.lookup(name)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def output(descriptor, path: nil)
|
|
13
|
+
properties = {}
|
|
14
|
+
result = {
|
|
15
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
16
|
+
type: 'object',
|
|
17
|
+
properties: properties
|
|
18
|
+
}
|
|
19
|
+
if path
|
|
20
|
+
# follow path down
|
|
21
|
+
parts = path.split('.')
|
|
22
|
+
field_name = parts.last
|
|
23
|
+
parts[...-1].each do |part|
|
|
24
|
+
field = descriptor.field.find { |f| f.name == part }
|
|
25
|
+
raise "Field #{part} not found in #{descriptor.name}" unless field
|
|
26
|
+
|
|
27
|
+
descriptor = fetch(field.type_name)&.to_proto
|
|
28
|
+
end
|
|
29
|
+
result[:required] = [field_name]
|
|
30
|
+
properties[field_name] = field_object(descriptor.field.find { |f| f.name == field_name.to_s })
|
|
31
|
+
else
|
|
32
|
+
result[:required] = descriptor.field.reject(&:proto3_optional).map(&:name)
|
|
33
|
+
descriptor.field.each do |f|
|
|
34
|
+
properties[f.name] = field_object(f)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
JSON.pretty_generate(result)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def field_object(field, ignore_repeated: false)
|
|
41
|
+
klass = fetch(field.type_name)&.to_proto
|
|
42
|
+
if field.label == :LABEL_REPEATED && !ignore_repeated
|
|
43
|
+
if klass&.options.respond_to?(:map_entry) && klass.options.map_entry
|
|
44
|
+
return {
|
|
45
|
+
type: 'object',
|
|
46
|
+
additionalProperties: field_object(klass.field[1])
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
return {
|
|
50
|
+
type: 'array',
|
|
51
|
+
items: field_object(field, ignore_repeated: true)
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
field_type(field, klass)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def field_type(field, klass)
|
|
58
|
+
case field.type
|
|
59
|
+
when :TYPE_INT32, :TYPE_UINT32, :TYPE_SINT32, :TYPE_FIXED32, :TYPE_SFIXED32
|
|
60
|
+
{ type: 'integer' }
|
|
61
|
+
when :TYPE_FLOAT, :TYPE_DOUBLE
|
|
62
|
+
{ type: 'number' }
|
|
63
|
+
when :TYPE_INT64, :TYPE_UINT64, :TYPE_SINT64, :TYPE_FIXED64, :TYPE_SFIXED64, :TYPE_STRING, :TYPE_BYTES
|
|
64
|
+
{ type: 'string' }
|
|
65
|
+
when :TYPE_BOOL
|
|
66
|
+
{ type: 'boolean' }
|
|
67
|
+
else
|
|
68
|
+
if klass.is_a?(Google::Protobuf::EnumDescriptorProto)
|
|
69
|
+
{ enum: klass.to_h[:value].map { |h| h[:name] } }
|
|
70
|
+
else
|
|
71
|
+
{ type: 'object' }
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class SchemaRegistry
|
|
4
|
+
module Output
|
|
5
|
+
module ProtoText
|
|
6
|
+
ParseInfo = Struct.new(:writer, :package, :message) do
|
|
7
|
+
%i[write write_indent write_line writenl indent dedent].each do |method|
|
|
8
|
+
define_method(method) do |*args|
|
|
9
|
+
writer.send(method, *args)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class Writer < StringIO
|
|
15
|
+
def initialize(...)
|
|
16
|
+
super
|
|
17
|
+
@indent = 0
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def write_indent(str)
|
|
21
|
+
@indent.times { write(' ') }
|
|
22
|
+
write(str)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def write_line(line, newline = 1)
|
|
26
|
+
write_indent(line)
|
|
27
|
+
newline.times { writenl }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def writenl
|
|
31
|
+
write("\n")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def indent
|
|
35
|
+
@indent += 2
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def dedent
|
|
39
|
+
@indent -= 2
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
class << self
|
|
44
|
+
def fetch(message_name)
|
|
45
|
+
name = message_name.start_with?('.') ? message_name[1..] : message_name
|
|
46
|
+
Google::Protobuf::DescriptorPool.generated_pool.lookup(name)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def output(file_descriptor)
|
|
50
|
+
writer = Writer.new
|
|
51
|
+
info = ParseInfo.new(writer, file_descriptor.package)
|
|
52
|
+
writer.write_line("syntax = \"#{file_descriptor.syntax}\";", 2)
|
|
53
|
+
writer.write_line("package #{file_descriptor.package};")
|
|
54
|
+
writer.writenl
|
|
55
|
+
found = false
|
|
56
|
+
file_descriptor.options.to_h.each do |name, value|
|
|
57
|
+
found = true
|
|
58
|
+
writer.write_line("option #{name} = #{value.to_json};")
|
|
59
|
+
end
|
|
60
|
+
writer.writenl if found
|
|
61
|
+
|
|
62
|
+
found = false
|
|
63
|
+
file_descriptor.dependency.each do |dependency|
|
|
64
|
+
found = true
|
|
65
|
+
writer.write_line("import \"#{dependency}\";")
|
|
66
|
+
end
|
|
67
|
+
writer.writenl if found
|
|
68
|
+
|
|
69
|
+
writer.writenl if write_options(info, file_descriptor)
|
|
70
|
+
writer.writenl if write_extensions(info, file_descriptor)
|
|
71
|
+
|
|
72
|
+
file_descriptor.enum_type.each do |enum_type|
|
|
73
|
+
write_enum(info, enum_type)
|
|
74
|
+
end
|
|
75
|
+
file_descriptor.message_type.each do |message_type|
|
|
76
|
+
write_message(info, message_type)
|
|
77
|
+
end
|
|
78
|
+
file_descriptor.service.each do |service|
|
|
79
|
+
write_service(info, service)
|
|
80
|
+
end
|
|
81
|
+
writer.string
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def write_extensions(info, descriptor)
|
|
85
|
+
descriptor.extension.each do |extension|
|
|
86
|
+
info.write_line("extend #{extension.extendee[1..]} {")
|
|
87
|
+
info.indent
|
|
88
|
+
write_field(info, extension)
|
|
89
|
+
info.dedent
|
|
90
|
+
info.write_line('}')
|
|
91
|
+
end
|
|
92
|
+
descriptor.extension.any?
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def write_reserved(writer, descriptor)
|
|
96
|
+
reserved = descriptor.reserved_range.map do |range|
|
|
97
|
+
range.start == range.end - 1 ? range.start.to_s : "#{range.start} to #{range.end - 1}"
|
|
98
|
+
end
|
|
99
|
+
found = false
|
|
100
|
+
if reserved.any?
|
|
101
|
+
found = true
|
|
102
|
+
writer.write_line("reserved #{reserved.join(', ')};")
|
|
103
|
+
end
|
|
104
|
+
if descriptor.reserved_name.any?
|
|
105
|
+
found = true
|
|
106
|
+
writer.write_line("reserved #{descriptor.reserved_name.map(&:to_json).join(', ')};")
|
|
107
|
+
end
|
|
108
|
+
writer.writenl if found
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def write_imports(writer, file_descriptor)
|
|
112
|
+
writer.writenl
|
|
113
|
+
file_descriptor.dependency.each do |dependency|
|
|
114
|
+
writer.write_line("import \"#{dependency}\";")
|
|
115
|
+
end
|
|
116
|
+
file_descriptor.public_dependency.each do |public_dependency|
|
|
117
|
+
writer.write_line("import public \"#{public_dependency}\";")
|
|
118
|
+
end
|
|
119
|
+
file_descriptor.option_dependency.each do |option_dependency|
|
|
120
|
+
writer.write_line("import weak \"#{option_dependency}\";")
|
|
121
|
+
end
|
|
122
|
+
writer.writenl
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def write_message(info, message_type)
|
|
126
|
+
info.message = message_type
|
|
127
|
+
info.write_indent('message ')
|
|
128
|
+
info.write("#{message_type.name} {")
|
|
129
|
+
info.writenl
|
|
130
|
+
info.indent
|
|
131
|
+
|
|
132
|
+
write_options(info, message_type)
|
|
133
|
+
write_reserved(info, message_type)
|
|
134
|
+
|
|
135
|
+
message_type.enum_type.each do |enum|
|
|
136
|
+
info.writenl
|
|
137
|
+
write_enum(info, enum)
|
|
138
|
+
end
|
|
139
|
+
message_type.field.each do |field|
|
|
140
|
+
write_field(info, field)
|
|
141
|
+
end
|
|
142
|
+
message_type.extension.each do |extension|
|
|
143
|
+
write_field(info, extension)
|
|
144
|
+
end
|
|
145
|
+
write_oneofs(info, message_type)
|
|
146
|
+
message_type.nested_type.each do |subtype|
|
|
147
|
+
next if subtype.options&.map_entry
|
|
148
|
+
|
|
149
|
+
info.writenl
|
|
150
|
+
write_message(info, subtype)
|
|
151
|
+
end
|
|
152
|
+
info.dedent
|
|
153
|
+
info.write_line('}')
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def field_type(info, field_type)
|
|
157
|
+
case field_type.type
|
|
158
|
+
when :TYPE_INT32
|
|
159
|
+
'int32'
|
|
160
|
+
when :TYPE_INT64
|
|
161
|
+
'int64'
|
|
162
|
+
when :TYPE_UINT32
|
|
163
|
+
'uint32'
|
|
164
|
+
when :TYPE_UINT64
|
|
165
|
+
'uint64'
|
|
166
|
+
when :TYPE_SINT32
|
|
167
|
+
'sint32'
|
|
168
|
+
when :TYPE_SINT64
|
|
169
|
+
'sint64'
|
|
170
|
+
when :TYPE_FIXED32
|
|
171
|
+
'fixed32'
|
|
172
|
+
when :TYPE_FIXED64
|
|
173
|
+
'fixed64'
|
|
174
|
+
when :TYPE_SFIXED32
|
|
175
|
+
'sfixed32'
|
|
176
|
+
when :TYPE_SFIXED64
|
|
177
|
+
'sfixed64'
|
|
178
|
+
when :TYPE_FLOAT
|
|
179
|
+
'float'
|
|
180
|
+
when :TYPE_DOUBLE
|
|
181
|
+
'double'
|
|
182
|
+
when :TYPE_BOOL
|
|
183
|
+
'bool'
|
|
184
|
+
when :TYPE_STRING
|
|
185
|
+
'string'
|
|
186
|
+
when :TYPE_BYTES
|
|
187
|
+
'bytes'
|
|
188
|
+
when :TYPE_ENUM, :TYPE_MESSAGE
|
|
189
|
+
# remove leading .
|
|
190
|
+
type = fetch(field_type.type_name[1..])
|
|
191
|
+
name = type.name.sub("#{info.package}.#{info.message.name}.", '')
|
|
192
|
+
name.sub("#{info.package}.", '')
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def write_field(info, field, oneof: false)
|
|
197
|
+
return if !oneof && field.has_oneof_index?
|
|
198
|
+
|
|
199
|
+
info.write_indent('')
|
|
200
|
+
|
|
201
|
+
klass = nil
|
|
202
|
+
klass = fetch(field.type_name).to_proto if field.type_name && field.type_name != ''
|
|
203
|
+
|
|
204
|
+
if field.proto3_optional
|
|
205
|
+
info.write('optional ')
|
|
206
|
+
elsif field.label == :LABEL_REPEATED && !klass&.options&.map_entry
|
|
207
|
+
info.write('repeated ')
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
if klass&.options&.map_entry
|
|
211
|
+
info.write("map<#{field_type(info, klass.field[0])}, #{field_type(info, klass.field[1])}>")
|
|
212
|
+
else
|
|
213
|
+
info.write(field_type(info, field).to_s)
|
|
214
|
+
end
|
|
215
|
+
info.write(" #{field.name} = #{field.number}")
|
|
216
|
+
|
|
217
|
+
write_field_options(info, field)
|
|
218
|
+
info.write(';')
|
|
219
|
+
info.writenl
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def write_field_options(info, field)
|
|
223
|
+
return unless field.options
|
|
224
|
+
|
|
225
|
+
info.write(' [')
|
|
226
|
+
info.write(field.options.to_h.map { |name, value| "#{name} = #{value}" }.join(', '))
|
|
227
|
+
write_options(info, field, include_option_label: false)
|
|
228
|
+
info.write(']')
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def write_oneofs(info, message)
|
|
232
|
+
message.oneof_decl.each_with_index do |oneof, i|
|
|
233
|
+
# synthetic oneof for proto3 optional fields
|
|
234
|
+
next if oneof.name.start_with?('_') &&
|
|
235
|
+
message.field.any? { |f| f.proto3_optional && f.name == oneof.name[1..] }
|
|
236
|
+
|
|
237
|
+
info.write_line("oneof #{oneof.name} {")
|
|
238
|
+
info.indent
|
|
239
|
+
message.field.select { |f| f.has_oneof_index? && f.oneof_index == i }.each do |field|
|
|
240
|
+
write_field(info, field, oneof: true)
|
|
241
|
+
end
|
|
242
|
+
info.dedent
|
|
243
|
+
info.write_line('}')
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def write_enum(info, enum_type)
|
|
248
|
+
info.write('enum ')
|
|
249
|
+
info.write("#{enum_type.name} {")
|
|
250
|
+
info.writenl
|
|
251
|
+
info.indent
|
|
252
|
+
write_reserved(info, enum_type)
|
|
253
|
+
enum_type.value.each do |value|
|
|
254
|
+
info.write_line("#{value.name} = #{value.number};")
|
|
255
|
+
end
|
|
256
|
+
info.dedent
|
|
257
|
+
info.write_line('}')
|
|
258
|
+
info.writenl
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def method_type(package, name)
|
|
262
|
+
output = name.sub("#{package}.", '')
|
|
263
|
+
output = output[1..] if output.start_with?('.')
|
|
264
|
+
output
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def write_service(info, service)
|
|
268
|
+
info.write_line("service #{service.name} {")
|
|
269
|
+
info.indent
|
|
270
|
+
service['method'].each do |method|
|
|
271
|
+
info.write_indent("rpc #{method.name}(#{method_type(info.package, method.input_type)}) ")
|
|
272
|
+
info.write("returns (#{method_type(info.package, method.output_type)}) {")
|
|
273
|
+
info.writenl
|
|
274
|
+
info.indent
|
|
275
|
+
write_options(info, method) if method.options
|
|
276
|
+
info.dedent
|
|
277
|
+
info.write_line('};')
|
|
278
|
+
end
|
|
279
|
+
info.dedent
|
|
280
|
+
info.write_line('}')
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# @return [Boolean] true if any options were written
|
|
284
|
+
def write_options(info, descriptor, include_option_label: true)
|
|
285
|
+
# unfortunately there doesn't seem to be a way to get the full list of options without
|
|
286
|
+
# resorting to to_json.
|
|
287
|
+
json = JSON.parse(descriptor.options.to_json)
|
|
288
|
+
return if !json || json.empty?
|
|
289
|
+
|
|
290
|
+
found = false
|
|
291
|
+
json.each_key do |name|
|
|
292
|
+
option_name = name.tr('[]', '')
|
|
293
|
+
ext = fetch(option_name)
|
|
294
|
+
next if ext.nil?
|
|
295
|
+
|
|
296
|
+
found = true
|
|
297
|
+
options = ext.get(descriptor.options)
|
|
298
|
+
if include_option_label
|
|
299
|
+
info.write_indent("option (#{option_name}) =")
|
|
300
|
+
else
|
|
301
|
+
info.write("(#{option_name}) = ")
|
|
302
|
+
end
|
|
303
|
+
if options.respond_to?(:to_h)
|
|
304
|
+
lines = JSON.pretty_generate(options.to_h).lines(chomp: true)
|
|
305
|
+
lines.each_with_index do |line, i|
|
|
306
|
+
info.write_indent(line)
|
|
307
|
+
info.writenl if i < lines.length - 1
|
|
308
|
+
end
|
|
309
|
+
info.write(';')
|
|
310
|
+
else
|
|
311
|
+
info.write(options.to_json)
|
|
312
|
+
end
|
|
313
|
+
info.writenl
|
|
314
|
+
end
|
|
315
|
+
found
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'schema_registry_client/schema/base'
|
|
4
|
+
require 'schema_registry_client/avro_schema_store'
|
|
5
|
+
|
|
6
|
+
class SchemaRegistry
|
|
7
|
+
module Schema
|
|
8
|
+
class Avro < Base
|
|
9
|
+
DEFAULT_SCHEMAS_PATH = './schemas'
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def schema_type
|
|
13
|
+
'AVRO'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def schema_store
|
|
17
|
+
@schema_store ||= SchemaRegistry::AvroSchemaStore.new(
|
|
18
|
+
path: SchemaRegistry.avro_schema_path || DEFAULT_SCHEMAS_PATH
|
|
19
|
+
)
|
|
20
|
+
@schema_store.load_schemas!
|
|
21
|
+
@schema_store
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def schema_text(_message, schema_name: nil)
|
|
25
|
+
schema_store.find_text(schema_name)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def encode(message, stream, schema_name: nil)
|
|
29
|
+
validate_options = { recursive: true,
|
|
30
|
+
encoded: false,
|
|
31
|
+
fail_on_extra_fields: true }
|
|
32
|
+
schema = schema_store.find(schema_name)
|
|
33
|
+
|
|
34
|
+
::Avro::SchemaValidator.validate!(schema, message, **validate_options)
|
|
35
|
+
|
|
36
|
+
writer = ::Avro::IO::DatumWriter.new(schema)
|
|
37
|
+
encoder = ::Avro::IO::BinaryEncoder.new(stream)
|
|
38
|
+
writer.write(message, encoder)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def decode(stream, schema_text)
|
|
42
|
+
# Parse the schema text from the registry into an Avro schema object
|
|
43
|
+
JSON.parse(schema_text)
|
|
44
|
+
writers_schema = ::Avro::Schema.parse(schema_text)
|
|
45
|
+
|
|
46
|
+
decoder = ::Avro::IO::BinaryDecoder.new(stream)
|
|
47
|
+
|
|
48
|
+
# Try to find the reader schema locally, fall back to writer schema
|
|
49
|
+
readers_schema = begin
|
|
50
|
+
schema_store.find(writers_schema.fullname)
|
|
51
|
+
rescue StandardError
|
|
52
|
+
writers_schema
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
reader = ::Avro::IO::DatumReader.new(writers_schema, readers_schema)
|
|
56
|
+
reader.read(decoder)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class SchemaRegistry
|
|
4
|
+
module Schema
|
|
5
|
+
class MissingImplementationError < StandardError; end
|
|
6
|
+
|
|
7
|
+
class Base
|
|
8
|
+
class << self
|
|
9
|
+
# @param message [Object]
|
|
10
|
+
# @param schema_name [String]
|
|
11
|
+
# @return [String]
|
|
12
|
+
def schema_text(_message, schema_name: nil)
|
|
13
|
+
raise MissingImplementationError, 'Subclasses must implement schema_text'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# @return [String]
|
|
17
|
+
def schema_type
|
|
18
|
+
raise MissingImplementationError, 'Subclasses must implement schema_type'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @param message [Object]
|
|
22
|
+
# @param stream [StringIO]
|
|
23
|
+
# @param schema_name [String]
|
|
24
|
+
def encode(_message, _stream, schema_name: nil)
|
|
25
|
+
raise MissingImplementationError, 'Subclasses must implement encode'
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @param stream [StringIO]
|
|
29
|
+
# @param schema_text [String]
|
|
30
|
+
# @param registry [Object]
|
|
31
|
+
# @return [Object]
|
|
32
|
+
def decode(_stream, _schema_text)
|
|
33
|
+
raise MissingImplementationError, 'Subclasses must implement decode'
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @param message [Object]
|
|
37
|
+
# @return [Hash<String, String>]
|
|
38
|
+
def dependencies(_message)
|
|
39
|
+
{}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'schema_registry_client/schema/base'
|
|
4
|
+
require 'schema_registry_client/output/json_schema'
|
|
5
|
+
|
|
6
|
+
class SchemaRegistry
|
|
7
|
+
module Schema
|
|
8
|
+
class ProtoJsonSchema < Base
|
|
9
|
+
class << self
|
|
10
|
+
def schema_type
|
|
11
|
+
'JSON'
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def schema_text(message, schema_name: nil)
|
|
15
|
+
SchemaRegistry::Output::JsonSchema.output(message.class.descriptor.to_proto)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def encode(message, stream, schema_name: nil)
|
|
19
|
+
json = message.to_h.sort.to_h.to_json
|
|
20
|
+
stream.write(json)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def decode(stream, _schema_text)
|
|
24
|
+
json = stream.read
|
|
25
|
+
JSON.parse(json)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'google/protobuf'
|
|
4
|
+
require 'google/protobuf/well_known_types'
|
|
5
|
+
require 'google/protobuf/descriptor_pb'
|
|
6
|
+
require 'schema_registry_client/output/proto_text'
|
|
7
|
+
require 'schema_registry_client/schema/base'
|
|
8
|
+
require 'schema_registry_client/wire'
|
|
9
|
+
|
|
10
|
+
class SchemaRegistry
|
|
11
|
+
module Schema
|
|
12
|
+
class Protobuf < Base
|
|
13
|
+
class << self
|
|
14
|
+
def schema_type
|
|
15
|
+
'PROTOBUF'
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def schema_text(message, schema_name: nil)
|
|
19
|
+
file_descriptor = if message.is_a?(Google::Protobuf::FileDescriptor)
|
|
20
|
+
message
|
|
21
|
+
else
|
|
22
|
+
message.class.descriptor.file_descriptor
|
|
23
|
+
end
|
|
24
|
+
SchemaRegistry::Output::ProtoText.output(file_descriptor.to_proto)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def encode(message, stream, schema_name: nil)
|
|
28
|
+
_, indexes = find_index(message.class.descriptor.to_proto,
|
|
29
|
+
message.class.descriptor.file_descriptor.to_proto.message_type)
|
|
30
|
+
|
|
31
|
+
if indexes == [0]
|
|
32
|
+
SchemaRegistry::Wire.write_int(stream, 0)
|
|
33
|
+
else
|
|
34
|
+
SchemaRegistry::Wire.write_int(stream, indexes.length)
|
|
35
|
+
indexes.each { |i| SchemaRegistry::Wire.write_int(stream, i) }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Now we write the actual message.
|
|
39
|
+
stream.write(message.to_proto)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def decode(stream, schema_text)
|
|
43
|
+
# See https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format
|
|
44
|
+
index_length = SchemaRegistry::Wire.read_int(stream)
|
|
45
|
+
indexes = []
|
|
46
|
+
if index_length.zero?
|
|
47
|
+
indexes.push(0)
|
|
48
|
+
else
|
|
49
|
+
index_length.times do
|
|
50
|
+
indexes.push(SchemaRegistry::Wire.read_int(stream))
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
encoded = stream.read
|
|
55
|
+
decode_protobuf(schema_text, encoded, indexes)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def load_schemas!
|
|
59
|
+
@all_schemas = {}
|
|
60
|
+
all_files = ObjectSpace.each_object(Google::Protobuf::FileDescriptor).to_a
|
|
61
|
+
all_files.each do |file_desc|
|
|
62
|
+
file_path = file_desc.name
|
|
63
|
+
next if file_path.start_with?('google/protobuf/') # skip built-in protos
|
|
64
|
+
|
|
65
|
+
@all_schemas[file_path] = file_desc
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def dependencies(message)
|
|
70
|
+
load_schemas! unless @all_schemas&.any?
|
|
71
|
+
file_descriptor = if message.is_a?(Google::Protobuf::FileDescriptor)
|
|
72
|
+
message
|
|
73
|
+
else
|
|
74
|
+
message.class.descriptor.file_descriptor
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
deps = file_descriptor.to_proto.dependency.to_a
|
|
78
|
+
.reject { |d| d.start_with?('google/protobuf/') }
|
|
79
|
+
deps.to_h do |dep|
|
|
80
|
+
dependency_schema = @all_schemas[dep]
|
|
81
|
+
[dependency_schema.name, dependency_schema]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def find_index(descriptor, messages, indexes = [])
|
|
86
|
+
messages.each_with_index do |sub_descriptor, i|
|
|
87
|
+
if sub_descriptor == descriptor
|
|
88
|
+
indexes.push(i)
|
|
89
|
+
return [true, indexes]
|
|
90
|
+
else
|
|
91
|
+
found, found_indexes = find_index(descriptor, sub_descriptor.nested_type, indexes + [i])
|
|
92
|
+
return [true, found_indexes] if found
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
[]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def find_descriptor(indexes, messages)
|
|
99
|
+
first_index = indexes.shift
|
|
100
|
+
message = messages[first_index]
|
|
101
|
+
path = [message.name]
|
|
102
|
+
while indexes.length.positive?
|
|
103
|
+
message = message.nested_type[indexes.shift]
|
|
104
|
+
path.push(message.name)
|
|
105
|
+
end
|
|
106
|
+
path
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def decode_protobuf(schema, encoded, indexes)
|
|
110
|
+
# get the package
|
|
111
|
+
package = schema.match(/package (\S+);/)[1]
|
|
112
|
+
# get the first message in the protobuf text
|
|
113
|
+
# TODO - get the correct message based on schema index
|
|
114
|
+
message_name = schema.match(/message (\w+) {/)[1]
|
|
115
|
+
# look up the descriptor
|
|
116
|
+
full_name = "#{package}.#{message_name}"
|
|
117
|
+
descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(full_name)
|
|
118
|
+
unless descriptor
|
|
119
|
+
msg = "Could not find schema for #{full_name}. " \
|
|
120
|
+
'Make sure the corresponding .proto file has been compiled and loaded.'
|
|
121
|
+
raise msg
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
path = find_descriptor(indexes, descriptor.file_descriptor.to_proto.message_type)
|
|
125
|
+
correct_message = Google::Protobuf::DescriptorPool.generated_pool.lookup("#{package}.#{path.join('.')}")
|
|
126
|
+
correct_message.msgclass.decode(encoded)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class SchemaRegistry
|
|
4
|
+
module Wire
|
|
5
|
+
class << self
|
|
6
|
+
# Write an int with zig-zag encoding. Copied from Avro.
|
|
7
|
+
def write_int(stream, num)
|
|
8
|
+
num = (num << 1) ^ (num >> 63)
|
|
9
|
+
while (num & ~0x7F) != 0
|
|
10
|
+
stream.write(((num & 0x7f) | 0x80).chr)
|
|
11
|
+
num >>= 7
|
|
12
|
+
end
|
|
13
|
+
stream.write(num.chr)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Read an int with zig-zag encoding. Copied from Avro.
|
|
17
|
+
def read_int(stream)
|
|
18
|
+
b = stream.readbyte
|
|
19
|
+
n = b & 0x7F
|
|
20
|
+
shift = 7
|
|
21
|
+
while (b & 0x80) != 0
|
|
22
|
+
b = stream.readbyte
|
|
23
|
+
n |= (b & 0x7F) << shift
|
|
24
|
+
shift += 7
|
|
25
|
+
end
|
|
26
|
+
(n >> 1) ^ -(n & 1)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|