schema_registry_client 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/lint.yml +18 -0
  3. data/.github/workflows/release.yml +31 -0
  4. data/.github/workflows/test.yml +22 -0
  5. data/.gitignore +1 -0
  6. data/.rspec +2 -0
  7. data/.rubocop.yml +39 -0
  8. data/CHANGELOG.md +12 -0
  9. data/Gemfile +5 -0
  10. data/Gemfile.lock +150 -0
  11. data/LICENSE +20 -0
  12. data/README.md +48 -0
  13. data/Rakefile +3 -0
  14. data/lib/schema_registry_client/avro_schema_store.rb +127 -0
  15. data/lib/schema_registry_client/cached_confluent_schema_registry.rb +57 -0
  16. data/lib/schema_registry_client/confluent_schema_registry.rb +118 -0
  17. data/lib/schema_registry_client/output/json_schema.rb +78 -0
  18. data/lib/schema_registry_client/output/proto_text.rb +320 -0
  19. data/lib/schema_registry_client/schema/avro.rb +61 -0
  20. data/lib/schema_registry_client/schema/base.rb +44 -0
  21. data/lib/schema_registry_client/schema/proto_json_schema.rb +30 -0
  22. data/lib/schema_registry_client/schema/protobuf.rb +131 -0
  23. data/lib/schema_registry_client/version.rb +5 -0
  24. data/lib/schema_registry_client/wire.rb +30 -0
  25. data/lib/schema_registry_client.rb +156 -0
  26. data/schema_registry_client.gemspec +33 -0
  27. data/spec/decoding_spec.rb +183 -0
  28. data/spec/encoding_spec.rb +207 -0
  29. data/spec/gen/everything/everything_pb.rb +26 -0
  30. data/spec/gen/referenced/referer_pb.rb +24 -0
  31. data/spec/gen/simple/simple_pb.rb +18 -0
  32. data/spec/json_schema_spec.rb +12 -0
  33. data/spec/proto_text_spec.rb +10 -0
  34. data/spec/schemas/everything/everything.json +328 -0
  35. data/spec/schemas/everything/everything.proto +105 -0
  36. data/spec/schemas/referenced/referenced.json +16 -0
  37. data/spec/schemas/referenced/referer.proto +28 -0
  38. data/spec/schemas/referenced/v1/MessageBA.avsc +21 -0
  39. data/spec/schemas/simple/simple.json +12 -0
  40. data/spec/schemas/simple/simple.proto +12 -0
  41. data/spec/schemas/simple/v1/SimpleMessage.avsc +11 -0
  42. data/spec/spec_helper.rb +16 -0
  43. metadata +46 -9
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'json'
5
+ require 'schema_registry_client/confluent_schema_registry'
6
+ require 'schema_registry_client/cached_confluent_schema_registry'
7
+ require 'schema_registry_client/schema/protobuf'
8
+ require 'schema_registry_client/schema/proto_json_schema'
9
+ require 'schema_registry_client/schema/avro'
10
+
11
+ class SchemaRegistry
12
+ class SchemaNotFoundError < StandardError; end
13
+ class SchemaError < StandardError; end
14
+
15
+ # Provides a way to encode and decode messages without having to embed schemas
16
+ # in the encoded data. Confluent's Schema Registry[1] is used to register
17
+ # a schema when encoding a message -- the registry will issue a schema id that
18
+ # will be included in the encoded data alongside the actual message. When
19
+ # decoding the data, the schema id will be used to look up the writer's schema
20
+ # from the registry.
21
+ #
22
+ # 1: https://github.com/confluentinc/schema-registry
23
+ # https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/serdes-protobuf.html
24
+ # https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format
25
+ MAGIC_BYTE = [0].pack('C').freeze
26
+
27
+ # Instantiate a new SchemaRegistry instance with the given configuration.
28
+ #
29
+ # registry - A schema registry object that responds to all methods in the
30
+ # SchemaRegistry::ConfluentSchemaRegistry interface.
31
+ # registry_url - The String URL of the schema registry that should be used.
32
+ # schema_context - Schema registry context name (optional)
33
+ # registry_path_prefix - The String URL path prefix used to namespace schema registry requests (optional).
34
+ # logger - The Logger that should be used to log information (optional).
35
+ # proxy - Forward the request via proxy (optional).
36
+ # user - User for basic auth (optional).
37
+ # password - Password for basic auth (optional).
38
+ # ssl_ca_file - Name of file containing CA certificate (optional).
39
+ # client_cert - Name of file containing client certificate (optional).
40
+ # client_key - Name of file containing client private key to go with client_cert (optional).
41
+ # client_key_pass - Password to go with client_key (optional).
42
+ # client_cert_data - In-memory client certificate (optional).
43
+ # client_key_data - In-memory client private key to go with client_cert_data (optional).
44
+ # connect_timeout - Timeout to use in the connection with the schema registry (optional).
45
+ # resolv_resolver - Custom domain name resolver (optional).
46
+ # schema_type - A SchemaRegistry::Schema::Base subclass.
47
+ def initialize( # rubocop:disable Metrics/ParameterLists
48
+ registry: nil,
49
+ registry_url: nil,
50
+ schema_context: nil,
51
+ registry_path_prefix: nil,
52
+ logger: nil,
53
+ proxy: nil,
54
+ user: nil,
55
+ password: nil,
56
+ ssl_ca_file: nil,
57
+ client_cert: nil,
58
+ client_key: nil,
59
+ client_key_pass: nil,
60
+ client_cert_data: nil,
61
+ client_key_data: nil,
62
+ connect_timeout: nil,
63
+ resolv_resolver: nil,
64
+ schema_type: SchemaRegistry::Schema::Protobuf
65
+ )
66
+ @logger = logger || Logger.new($stderr)
67
+ @registry = registry || SchemaRegistry::CachedConfluentSchemaRegistry.new(
68
+ SchemaRegistry::ConfluentSchemaRegistry.new(
69
+ registry_url,
70
+ schema_context: schema_context,
71
+ logger: @logger,
72
+ proxy: proxy,
73
+ user: user,
74
+ password: password,
75
+ ssl_ca_file: ssl_ca_file,
76
+ client_cert: client_cert,
77
+ client_key: client_key,
78
+ client_key_pass: client_key_pass,
79
+ client_cert_data: client_cert_data,
80
+ client_key_data: client_key_data,
81
+ path_prefix: registry_path_prefix,
82
+ connect_timeout: connect_timeout,
83
+ resolv_resolver: resolv_resolver
84
+ )
85
+ )
86
+ @schema = schema_type
87
+ end
88
+
89
+ class << self
90
+ attr_accessor :avro_schema_path
91
+ end
92
+
93
+ # Encodes a message using the specified schema.
94
+ # @param message [Object] The message that should be encoded. Must be compatible with the schema.
95
+ # @param subject [String] The subject name the schema should be registered under in the schema registry (optional).
96
+ # @param schema_name [String] the name of the schema to use for encoding (optional).
97
+ # @return [String] the encoded data.
98
+ def encode(message, subject: nil, schema_text: nil, schema_name: nil)
99
+ id = register_schema(message, subject, schema_text: schema_text, schema_name: schema_name)
100
+
101
+ stream = StringIO.new
102
+ # Always start with the magic byte.
103
+ stream.write(MAGIC_BYTE)
104
+
105
+ # The schema id is encoded as a 4-byte big-endian integer.
106
+ stream.write([id].pack('N'))
107
+
108
+ @schema.encode(message, stream, schema_name: schema_name)
109
+ stream.string
110
+ end
111
+
112
+ # Decodes data into the original message.
113
+ #
114
+ # @param data [String] a string containing encoded data.
115
+ # @return [Object] the decoded data.
116
+ def decode(data)
117
+ stream = StringIO.new(data)
118
+
119
+ # The first byte is MAGIC!!!
120
+ magic_byte = stream.read(1)
121
+
122
+ raise "Expected data to begin with a magic byte, got `#{magic_byte.inspect}`" if magic_byte != MAGIC_BYTE
123
+
124
+ # The schema id is a 4-byte big-endian integer.
125
+ schema_id = stream.read(4).unpack1('N')
126
+ schema = @registry.fetch(schema_id)
127
+ @schema.decode(stream, schema)
128
+ rescue Excon::Error::NotFound
129
+ raise SchemaNotFoundError, "Schema with id: #{schema_id} is not found on registry"
130
+ end
131
+
132
+ private
133
+
134
+ def register_schema(message, subject, schema_text: nil, schema_name: nil)
135
+ schema_text ||= @schema.schema_text(message, schema_name: schema_name)
136
+ return if @registry.registered?(schema_text, subject)
137
+
138
+ # register dependencies first
139
+ dependencies = @schema.dependencies(message)
140
+ versions = dependencies.map do |name, dependency|
141
+ result = register_schema(dependency, name)
142
+ @registry.fetch_version(result, name)
143
+ end
144
+
145
+ @registry.register(subject,
146
+ schema_text,
147
+ references: dependencies.keys.map.with_index do |dependency, i|
148
+ {
149
+ name: dependency,
150
+ subject: dependency,
151
+ version: versions[i]
152
+ }
153
+ end,
154
+ schema_type: @schema.schema_type)
155
+ end
156
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'schema_registry_client/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'schema_registry_client'
9
+ spec.version = SchemaRegistry::VERSION
10
+ spec.authors = ['Daniel Orner']
11
+ spec.email = ['daniel.orner@flipp.com']
12
+ spec.summary = 'Confluent Schema Registry client with support for Avro and Protobuf'
13
+ spec.homepage = 'https://github.com/flipp-oss/schema_registry_client'
14
+ spec.license = 'MIT'
15
+ spec.required_ruby_version = '>= 3.0'
16
+
17
+ spec.metadata['rubygems_mfa_required'] = 'true'
18
+
19
+ spec.files = `git ls-files -z`.split("\x0")
20
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
+ spec.require_paths = ['lib']
22
+
23
+ spec.add_dependency 'avro'
24
+ spec.add_dependency 'excon'
25
+ spec.add_dependency 'google-protobuf'
26
+
27
+ spec.add_development_dependency 'bundler', '~> 2.0'
28
+ spec.add_development_dependency 'rake', '~> 13.0'
29
+ spec.add_development_dependency 'rspec', '~> 3.2'
30
+ spec.add_development_dependency 'simplecov'
31
+ spec.add_development_dependency 'standardrb'
32
+ spec.add_development_dependency 'webmock'
33
+ end
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'decoding' do
4
+ let(:schema_registry_client) do
5
+ SchemaRegistry.new(
6
+ registry_url: 'http://localhost:8081'
7
+ )
8
+ end
9
+
10
+ it 'should decode a simple message' do
11
+ schema = File.read("#{__dir__}/schemas/simple/simple.proto")
12
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/15')
13
+ .to_return_json(body: { schema: schema })
14
+ msg = Simple::V1::SimpleMessage.new(name: 'my name')
15
+ encoded = "\u0000\u0000\u0000\u0000\u000F\u0000#{msg.to_proto}"
16
+ expect(schema_registry_client.decode(encoded)).to eq(msg)
17
+
18
+ # if we do it again we should not see any more requests
19
+ expect(schema_registry_client.decode(encoded)).to eq(msg)
20
+
21
+ expect(stub).to have_been_requested.once
22
+ end
23
+
24
+ it 'should decode a complex message' do
25
+ schema = File.read("#{__dir__}/schemas/referenced/referer.proto")
26
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/20')
27
+ .to_return_json(body: { schema: schema })
28
+ msg = Referenced::V1::MessageB::MessageBA.new(
29
+ simple: Simple::V1::SimpleMessage.new(name: 'my name')
30
+ )
31
+ encoded = "\u0000\u0000\u0000\u0000\u0014\u0004\u0002\u0000#{msg.to_proto}"
32
+ expect(schema_registry_client.decode(encoded)).to eq(msg)
33
+
34
+ # if we do it again we should not see any more requests
35
+ expect(schema_registry_client.decode(encoded)).to eq(msg)
36
+ expect(stub).to have_been_requested.once
37
+ end
38
+
39
+ describe 'with JSON' do
40
+ let(:schema_registry_client) do
41
+ SchemaRegistry.new(
42
+ registry_url: 'http://localhost:8081',
43
+ schema_type: SchemaRegistry::Schema::ProtoJsonSchema
44
+ )
45
+ end
46
+
47
+ it 'should decode a simple message' do
48
+ schema = File.read("#{__dir__}/schemas/simple/simple.json")
49
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/15')
50
+ .to_return_json(body: { schema: schema })
51
+ encoded = "\u0000\u0000\u0000\u0000\u000F{\"name\":\"my name\"}"
52
+ msg = { 'name' => 'my name' }
53
+ expect(schema_registry_client.decode(encoded)).to eq(msg)
54
+
55
+ # if we do it again we should not see any more requests
56
+ expect(schema_registry_client.decode(encoded)).to eq(msg)
57
+
58
+ expect(stub).to have_been_requested.once
59
+ end
60
+ end
61
+
62
+ describe 'with Avro' do
63
+ let(:schema_registry_client) do
64
+ SchemaRegistry.avro_schema_path = "#{__dir__}/schemas"
65
+ SchemaRegistry.new(
66
+ registry_url: 'http://localhost:8081',
67
+ schema_type: SchemaRegistry::Schema::Avro
68
+ )
69
+ end
70
+
71
+ after do
72
+ SchemaRegistry.avro_schema_path = nil
73
+ end
74
+
75
+ it 'should decode a simple message' do
76
+ schema = File.read("#{__dir__}/schemas/simple/v1/SimpleMessage.avsc")
77
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/15')
78
+ .to_return_json(body: { schema: schema })
79
+
80
+ # Avro-encoded data: "my name" as string (length 0x0E + bytes)
81
+ encoded = "\u0000\u0000\u0000\u0000\u000F\u000Emy name"
82
+ decoded = schema_registry_client.decode(encoded)
83
+
84
+ expect(decoded).to eq({ 'name' => 'my name' })
85
+
86
+ # if we do it again we should not see any more requests
87
+ expect(schema_registry_client.decode(encoded)).to eq(decoded)
88
+
89
+ expect(stub).to have_been_requested.once
90
+ end
91
+
92
+ it 'should decode a complex message with nested record' do
93
+ schema = File.read("#{__dir__}/schemas/referenced/v1/MessageBA.avsc")
94
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/20')
95
+ .to_return_json(body: { schema: schema })
96
+
97
+ # Avro-encoded nested record
98
+ encoded = "\u0000\u0000\u0000\u0000\u0014\u000Emy name"
99
+ decoded = schema_registry_client.decode(encoded)
100
+
101
+ expect(decoded).to eq({
102
+ 'simple' => {
103
+ 'name' => 'my name'
104
+ }
105
+ })
106
+
107
+ # if we do it again we should not see any more requests
108
+ expect(schema_registry_client.decode(encoded)).to eq(decoded)
109
+ expect(stub).to have_been_requested.once
110
+ end
111
+
112
+ it 'should decode a message with multiple fields' do
113
+ multi_schema = {
114
+ 'type' => 'record',
115
+ 'name' => 'MultiFieldMessage',
116
+ 'namespace' => 'test.v1',
117
+ 'fields' => [
118
+ { 'name' => 'name', 'type' => 'string' },
119
+ { 'name' => 'age', 'type' => 'int' }
120
+ ]
121
+ }
122
+ schema_json = JSON.generate(multi_schema)
123
+
124
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/25')
125
+ .to_return_json(body: { schema: schema_json })
126
+
127
+ # Manually encode the message for testing
128
+ # Alice = 0x0A (length 5*2) + "Alice" bytes
129
+ # age 30 = zigzag encoded as 60 (0x3C)
130
+ encoded = "\u0000\u0000\u0000\u0000\u0019\u000AAlice\u003C"
131
+ decoded = schema_registry_client.decode(encoded)
132
+
133
+ expect(decoded).to eq({ 'name' => 'Alice', 'age' => 30 })
134
+
135
+ expect(stub).to have_been_requested.once
136
+ end
137
+
138
+ it 'should handle schema evolution with reader schema' do
139
+ # Writer schema (what was used to encode) - has an additional field with default
140
+ writer_schema = {
141
+ 'type' => 'record',
142
+ 'name' => 'SimpleMessage',
143
+ 'namespace' => 'simple.v1',
144
+ 'fields' => [
145
+ { 'name' => 'name', 'type' => 'string' },
146
+ { 'name' => 'age', 'type' => 'int', 'default' => 0 }
147
+ ]
148
+ }
149
+
150
+ # Reader schema (what we have locally) - doesn't have the age field
151
+ # This simulates reading old data with a newer schema or vice versa
152
+
153
+ stub = stub_request(:get, 'http://localhost:8081/schemas/ids/15')
154
+ .to_return_json(body: { schema: JSON.generate(writer_schema) })
155
+
156
+ # Encoded with writer schema: "my name" (0x0E + bytes) + age 25 (zigzag encoded as 50 = 0x32)
157
+ encoded = "\u0000\u0000\u0000\u0000\u000F\u000Emy name\u0032"
158
+ decoded = schema_registry_client.decode(encoded)
159
+
160
+ # Decoded value should only have 'name' from reader schema, age is ignored
161
+ expect(decoded).to eq({ 'name' => 'my name' })
162
+ expect(stub).to have_been_requested.once
163
+ end
164
+
165
+ it 'should raise error for invalid magic byte' do
166
+ # Wrong magic byte (0x01 instead of 0x00)
167
+ encoded = "\u0001\u0000\u0000\u0000\u000F\u000Emy name"
168
+
169
+ expect do
170
+ schema_registry_client.decode(encoded)
171
+ end.to raise_error(/Expected data to begin with a magic byte/)
172
+ end
173
+
174
+ it 'should raise error for unknown schema id' do
175
+ # Schema ID 999 is not stubbed, so decoding should fail
176
+ encoded = "\u0000\u0000\u0000\u0003\u00E7\u000Emy name"
177
+
178
+ expect do
179
+ schema_registry_client.decode(encoded)
180
+ end.to raise_error(/Schema|not found/i)
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'encoding' do
4
+ let(:schema_registry_client) do
5
+ SchemaRegistry.new(
6
+ registry_url: 'http://localhost:8081'
7
+ )
8
+ end
9
+
10
+ it 'should encode a simple message' do
11
+ schema = File.read("#{__dir__}/schemas/simple/simple.proto")
12
+ stub = stub_request(:post, 'http://localhost:8081/subjects/simple/versions')
13
+ .with(body: { 'schemaType' => 'PROTOBUF',
14
+ 'references' => [],
15
+ 'schema' => schema }).to_return_json(body: { id: 15 })
16
+ msg = Simple::V1::SimpleMessage.new(name: 'my name')
17
+ encoded = schema_registry_client.encode(msg, subject: 'simple')
18
+ expect(encoded).to eq("\u0000\u0000\u0000\u0000\u000F\u0000#{msg.to_proto}")
19
+
20
+ # if we do it again we should not see any more requests
21
+ encoded2 = schema_registry_client.encode(msg, subject: 'simple')
22
+ expect(encoded2).to eq(encoded)
23
+
24
+ expect(stub).to have_been_requested.once
25
+ end
26
+
27
+ it 'should encode a complex message' do
28
+ schema = File.read("#{__dir__}/schemas/referenced/referer.proto")
29
+ dep_schema = File.read("#{__dir__}/schemas/simple/simple.proto")
30
+ dep_stub = stub_request(:post, 'http://localhost:8081/subjects/simple%2Fsimple.proto/versions')
31
+ .with(body: { 'schemaType' => 'PROTOBUF',
32
+ 'references' => [],
33
+ 'schema' => dep_schema }).to_return_json(body: { id: 15 })
34
+ version_stub = stub_request(:get, 'http://localhost:8081/schemas/ids/15/versions')
35
+ .to_return_json(body: [{ version: 1, subject: 'simple/simple.proto' }])
36
+ stub = stub_request(:post, 'http://localhost:8081/subjects/referenced/versions')
37
+ .with(body: { 'schemaType' => 'PROTOBUF',
38
+ 'references' => [
39
+ {
40
+ name: 'simple/simple.proto',
41
+ subject: 'simple/simple.proto',
42
+ version: 1
43
+ }
44
+ ],
45
+ 'schema' => schema }).to_return_json(body: { id: 20 })
46
+ msg = Referenced::V1::MessageB::MessageBA.new(
47
+ simple: Simple::V1::SimpleMessage.new(name: 'my name')
48
+ )
49
+ encoded = schema_registry_client.encode(msg, subject: 'referenced')
50
+ expect(encoded).to eq("\u0000\u0000\u0000\u0000\u0014\u0004\u0002\u0000#{msg.to_proto}")
51
+
52
+ # if we do it again we should not see any more requests
53
+ encoded2 = schema_registry_client.encode(msg, subject: 'referenced')
54
+ expect(encoded2).to eq(encoded)
55
+ expect(stub).to have_been_requested.once
56
+ expect(dep_stub).to have_been_requested.once
57
+ expect(version_stub).to have_been_requested.once
58
+ end
59
+
60
+ describe 'with JSON' do
61
+ let(:schema_registry_client) do
62
+ SchemaRegistry.new(
63
+ registry_url: 'http://localhost:8081',
64
+ schema_type: SchemaRegistry::Schema::ProtoJsonSchema
65
+ )
66
+ end
67
+
68
+ it 'should encode a simple message' do
69
+ schema = File.read("#{__dir__}/schemas/simple/simple.json").strip
70
+ stub = stub_request(:post, 'http://localhost:8081/subjects/simple/versions')
71
+ .with(body: { 'schemaType' => 'JSON',
72
+ 'references' => [],
73
+ 'schema' => schema }).to_return_json(body: { id: 15 })
74
+ msg = Simple::V1::SimpleMessage.new(name: 'my name')
75
+ encoded = schema_registry_client.encode(msg, subject: 'simple')
76
+ expect(encoded).to eq("\u0000\u0000\u0000\u0000\u000F{\"name\":\"my name\"}")
77
+
78
+ # if we do it again we should not see any more requests
79
+ encoded2 = schema_registry_client.encode(msg, subject: 'simple')
80
+ expect(encoded2).to eq(encoded)
81
+
82
+ expect(stub).to have_been_requested.once
83
+ end
84
+
85
+ it 'should encode a complex message' do
86
+ schema = File.read("#{__dir__}/schemas/referenced/referenced.json").strip
87
+ stub = stub_request(:post, 'http://localhost:8081/subjects/referenced/versions')
88
+ .with(body: { 'schemaType' => 'JSON',
89
+ 'references' => [],
90
+ 'schema' => schema }).to_return_json(body: { id: 20 })
91
+ msg = Referenced::V1::MessageB::MessageBA.new(
92
+ simple: Simple::V1::SimpleMessage.new(name: 'my name')
93
+ )
94
+ encoded = schema_registry_client.encode(msg, subject: 'referenced')
95
+ expect(encoded).to eq("\u0000\u0000\u0000\u0000\u0014{\"simple\":{\"name\":\"my name\"}}")
96
+
97
+ # if we do it again we should not see any more requests
98
+ encoded2 = schema_registry_client.encode(msg, subject: 'referenced')
99
+ expect(encoded2).to eq(encoded)
100
+ expect(stub).to have_been_requested.once
101
+ end
102
+ end
103
+
104
+ describe 'with Avro' do
105
+ let(:schema_registry_client) do
106
+ SchemaRegistry.avro_schema_path = "#{__dir__}/schemas"
107
+ SchemaRegistry.new(
108
+ registry_url: 'http://localhost:8081',
109
+ schema_type: SchemaRegistry::Schema::Avro
110
+ )
111
+ end
112
+
113
+ after do
114
+ SchemaRegistry.avro_schema_path = nil
115
+ end
116
+
117
+ it 'should encode a simple message' do
118
+ schema = File.read("#{__dir__}/schemas/simple/v1/SimpleMessage.avsc")
119
+ stub = stub_request(:post, 'http://localhost:8081/subjects/simple/versions')
120
+ .with(body: { 'schemaType' => 'AVRO',
121
+ 'references' => [],
122
+ 'schema' => schema }).to_return_json(body: { id: 15 })
123
+ msg = { 'name' => 'my name' }
124
+ encoded = schema_registry_client.encode(msg, subject: 'simple', schema_name: 'simple.v1.SimpleMessage')
125
+ # Avro encoding: magic byte (0x00) + schema id (4 bytes, big-endian) + Avro binary data
126
+ # "my name" encoded as Avro string: length (0x0E = 14) + "my name" bytes
127
+ expect(encoded).to eq("\u0000\u0000\u0000\u0000\u000F\u000Emy name")
128
+
129
+ # if we do it again we should not see any more requests
130
+ encoded2 = schema_registry_client.encode(msg, subject: 'simple', schema_name: 'simple.v1.SimpleMessage')
131
+ expect(encoded2).to eq(encoded)
132
+
133
+ expect(stub).to have_been_requested.once
134
+ end
135
+
136
+ it 'should encode a complex message with nested record' do
137
+ schema = File.read("#{__dir__}/schemas/referenced/v1/MessageBA.avsc")
138
+ stub = stub_request(:post, 'http://localhost:8081/subjects/referenced/versions')
139
+ .with(body: { 'schemaType' => 'AVRO',
140
+ 'references' => [],
141
+ 'schema' => schema }).to_return_json(body: { id: 20 })
142
+ msg = {
143
+ 'simple' => {
144
+ 'name' => 'my name'
145
+ }
146
+ }
147
+ encoded = schema_registry_client.encode(msg, subject: 'referenced', schema_name: 'referenced.v1.MessageBA')
148
+ # Avro encoding: magic byte + schema id + Avro binary for nested record
149
+ expect(encoded).to eq("\u0000\u0000\u0000\u0000\u0014\u000Emy name")
150
+
151
+ # if we do it again we should not see any more requests
152
+ encoded2 = schema_registry_client.encode(msg, subject: 'referenced', schema_name: 'referenced.v1.MessageBA')
153
+ expect(encoded2).to eq(encoded)
154
+ expect(stub).to have_been_requested.once
155
+ end
156
+
157
+ it 'should handle multiple fields' do
158
+ # Create a temporary schema file for testing
159
+ multi_schema_path = "#{__dir__}/schemas/test/v1"
160
+ FileUtils.mkdir_p(multi_schema_path)
161
+
162
+ multi_schema = {
163
+ 'type' => 'record',
164
+ 'name' => 'MultiFieldMessage',
165
+ 'namespace' => 'test.v1',
166
+ 'fields' => [
167
+ { 'name' => 'name', 'type' => 'string' },
168
+ { 'name' => 'age', 'type' => 'int' }
169
+ ]
170
+ }
171
+ schema_json = JSON.pretty_generate(multi_schema)
172
+ File.write("#{multi_schema_path}/MultiFieldMessage.avsc", schema_json)
173
+
174
+ stub = stub_request(:post, 'http://localhost:8081/subjects/multi/versions')
175
+ .with(body: { 'schemaType' => 'AVRO',
176
+ 'references' => [],
177
+ 'schema' => schema_json }).to_return_json(body: { id: 25 })
178
+
179
+ msg = { 'name' => 'Alice', 'age' => 30 }
180
+ encoded = schema_registry_client.encode(msg, subject: 'multi', schema_name: 'test.v1.MultiFieldMessage')
181
+
182
+ # Verify encoding starts with magic byte and schema id
183
+ expect(encoded[0]).to eq("\u0000")
184
+ expect(encoded[1..4].unpack1('N')).to eq(25)
185
+
186
+ expect(stub).to have_been_requested.once
187
+
188
+ # Clean up
189
+ FileUtils.rm_rf("#{__dir__}/schemas/test")
190
+ end
191
+
192
+ it 'should validate schema before encoding' do
193
+ schema = File.read("#{__dir__}/schemas/simple/v1/SimpleMessage.avsc")
194
+ stub_request(:post, 'http://localhost:8081/subjects/simple/versions')
195
+ .with(body: { 'schemaType' => 'AVRO',
196
+ 'references' => [],
197
+ 'schema' => schema }).to_return_json(body: { id: 15 })
198
+
199
+ # Invalid message - missing required field
200
+ msg = { 'invalid_field' => 'value' }
201
+
202
+ expect do
203
+ schema_registry_client.encode(msg, subject: 'simple', schema_name: 'simple.v1.SimpleMessage')
204
+ end.to raise_error(Avro::SchemaValidator::ValidationError)
205
+ end
206
+ end
207
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
4
+ # source: everything/everything.proto
5
+
6
+ require 'google/protobuf'
7
+
8
+ require 'simple/simple_pb'
9
+ require 'google/protobuf/descriptor_pb'
10
+
11
+ descriptor_data = "\n\x1b\x65verything/everything.proto\x12\reverything.v1\x1a\x13simple/simple.proto\x1a google/protobuf/descriptor.proto\"*\n\x0e\x46oreignMessage\x12\n\n\x02id\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xc0\x12\n\x0cTestAllTypes\x12\x16\n\x0eoptional_int32\x18\x01 \x01(\x05\x12\x16\n\x0eoptional_int64\x18\x02 \x01(\x03\x12\x17\n\x0foptional_uint32\x18\x03 \x01(\r\x12\x17\n\x0foptional_uint64\x18\x04 \x01(\x04\x12\x17\n\x0foptional_sint32\x18\x05 \x01(\x11\x12\x17\n\x0foptional_sint64\x18\x06 \x01(\x12\x12\x18\n\x10optional_fixed32\x18\x07 \x01(\x07\x12\x18\n\x10optional_fixed64\x18\x08 \x01(\x06\x12\x19\n\x11optional_sfixed32\x18\t \x01(\x0f\x12\x19\n\x11optional_sfixed64\x18\n \x01(\x10\x12\x16\n\x0eoptional_float\x18\x0b \x01(\x02\x12\x17\n\x0foptional_double\x18\x0c \x01(\x01\x12\x15\n\roptional_bool\x18\r \x01(\x08\x12\x17\n\x0foptional_string\x18\x0e \x01(\t\x12\x16\n\x0eoptional_bytes\x18\x0f \x01(\x0c\x12J\n\x17optional_nested_message\x18\x12 \x01(\x0b\x32).everything.v1.TestAllTypes.NestedMessage\x12?\n\x18optional_foreign_message\x18\x13 \x01(\x0b\x32\x1d.everything.v1.ForeignMessage\x12\x39\n\x17optional_import_message\x18\x14 \x01(\x0b\x32\x18.simple.v1.SimpleMessage\x12\x44\n\x14optional_nested_enum\x18\x15 \x01(\x0e\x32&.everything.v1.TestAllTypes.NestedEnum\x12\x39\n\x15optional_foreign_enum\x18\x16 \x01(\x0e\x32\x1a.everything.v1.ForeignEnum\x12\x33\n\x14optional_import_enum\x18\x17 \x01(\x0e\x32\x15.simple.v1.SimpleEnum\x12!\n\x15optional_string_piece\x18\x18 \x01(\tB\x02\x08\x02\x12\x19\n\roptional_cord\x18\x19 \x01(\tB\x02\x08\x01\x12\x1f\n\x13optional_bytes_cord\x18V \x01(\x0c\x42\x02\x08\x01\x12L\n\x15optional_lazy_message\x18\x1b \x01(\x0b\x32).everything.v1.TestAllTypes.NestedMessageB\x02(\x01\x12W\n optional_unverified_lazy_message\x18\x1c \x01(\x0b\x32).everything.v1.TestAllTypes.NestedMessageB\x02x\x01\x12\x16\n\x0erepeated_int32\x18\x1f \x03(\x05\x12\x16\n\x0erepeated_int64\x18 \x03(\x03\x12\x17\n\x0frepeated_uint32\x18! \x03(\r\x12\x17\n\x0frepeated_uint64\x18\" \x03(\x04\x12\x17\n\x0frepeated_sint32\x18# \x03(\x11\x12\x17\n\x0frepeated_sint64\x18$ \x03(\x12\x12\x18\n\x10repeated_fixed32\x18% \x03(\x07\x12\x18\n\x10repeated_fixed64\x18& \x03(\x06\x12\x19\n\x11repeated_sfixed32\x18' \x03(\x0f\x12\x19\n\x11repeated_sfixed64\x18( \x03(\x10\x12\x16\n\x0erepeated_float\x18) \x03(\x02\x12\x17\n\x0frepeated_double\x18* \x03(\x01\x12\x15\n\rrepeated_bool\x18+ \x03(\x08\x12\x17\n\x0frepeated_string\x18, \x03(\t\x12\x16\n\x0erepeated_bytes\x18- \x03(\x0c\x12J\n\x17repeated_nested_message\x18\x30 \x03(\x0b\x32).everything.v1.TestAllTypes.NestedMessage\x12?\n\x18repeated_foreign_message\x18\x31 \x03(\x0b\x32\x1d.everything.v1.ForeignMessage\x12\x39\n\x17repeated_import_message\x18\x32 \x03(\x0b\x32\x18.simple.v1.SimpleMessage\x12\x44\n\x14repeated_nested_enum\x18\x33 \x03(\x0e\x32&.everything.v1.TestAllTypes.NestedEnum\x12\x39\n\x15repeated_foreign_enum\x18\x34 \x03(\x0e\x32\x1a.everything.v1.ForeignEnum\x12\x33\n\x14repeated_import_enum\x18\x35 \x03(\x0e\x32\x15.simple.v1.SimpleEnum\x12!\n\x15repeated_string_piece\x18\x36 \x03(\tB\x02\x08\x02\x12\x19\n\rrepeated_cord\x18\x37 \x03(\tB\x02\x08\x01\x12L\n\x15repeated_lazy_message\x18\x39 \x03(\x0b\x32).everything.v1.TestAllTypes.NestedMessageB\x02(\x01\x12\x16\n\x0coneof_uint32\x18o \x01(\rH\x00\x12I\n\x14oneof_nested_message\x18p \x01(\x0b\x32).everything.v1.TestAllTypes.NestedMessageH\x00\x12\x16\n\x0coneof_string\x18q \x01(\tH\x00\x12\x15\n\x0boneof_bytes\x18r \x01(\x0cH\x00\x12\x18\n\noneof_cord\x18s \x01(\tB\x02\x08\x01H\x00\x12 \n\x12oneof_string_piece\x18t \x01(\tB\x02\x08\x02H\x00\x12R\n\x19oneof_lazy_nested_message\x18u \x01(\x0b\x32).everything.v1.TestAllTypes.NestedMessageB\x02(\x01H\x00\x1a\"\n\rNestedMessage\x12\x11\n\x02\x62\x62\x18\x01 \x01(\x05\x42\x05\x90\x82\x19\xd2\t\x1a\x1a\n\rOptionalGroup\x12\t\n\x01\x61\x18\x11 \x01(\x05\x1a\x1a\n\rRepeatedGroup\x12\t\n\x01\x61\x18/ \x01(\x05\"'\n\nNestedEnum\x12\x07\n\x03\x46OO\x10\x00\x12\x07\n\x03\x42\x41R\x10\x02\x12\x07\n\x03\x42\x41Z\x10\x03\x42\r\n\x0boneof_field*/\n\x0b\x46oreignEnum\x12\x0f\n\x0b\x46OREIGN_FOO\x10\x00\x12\x0f\n\x0b\x46OREIGN_BAR\x10\x01\x32Y\n\x0bTestService\x12J\n\nTestMethod\x12\x1b.everything.v1.TestAllTypes\x1a\x1d.everything.v1.ForeignMessage\"\x00:4\n\x0bsome_option\x12\x1d.google.protobuf.FieldOptions\x18\xa2\x90\x03 \x01(\x05\x62\x06proto3"
12
+
13
+ pool = Google::Protobuf::DescriptorPool.generated_pool
14
+ pool.add_serialized_file(descriptor_data)
15
+
16
+ module Everything
17
+ module V1
18
+ ForeignMessage = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.ForeignMessage').msgclass
19
+ TestAllTypes = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.TestAllTypes').msgclass
20
+ TestAllTypes::NestedMessage = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.TestAllTypes.NestedMessage').msgclass
21
+ TestAllTypes::OptionalGroup = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.TestAllTypes.OptionalGroup').msgclass
22
+ TestAllTypes::RepeatedGroup = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.TestAllTypes.RepeatedGroup').msgclass
23
+ TestAllTypes::NestedEnum = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.TestAllTypes.NestedEnum').enummodule
24
+ ForeignEnum = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('everything.v1.ForeignEnum').enummodule
25
+ end
26
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
4
+ # source: referenced/referer.proto
5
+
6
+ require 'google/protobuf'
7
+
8
+ require 'simple/simple_pb'
9
+
10
+ descriptor_data = "\n\x18referenced/referer.proto\x12\rreferenced.v1\x1a\x13simple/simple.proto\"j\n\x08MessageA\x1a\x43\n\tMessageAA\x12\x0c\n\x04name\x18\x01 \x01(\t\x12(\n\x06simple\x18\x02 \x01(\x0b\x32\x18.simple.v1.SimpleMessage\x1a\x19\n\tMessageAB\x12\x0c\n\x04name\x18\x01 \x01(\t\"j\n\x08MessageB\x1a\x43\n\tMessageBA\x12\x0c\n\x04name\x18\x01 \x01(\t\x12(\n\x06simple\x18\x02 \x01(\x0b\x32\x18.simple.v1.SimpleMessage\x1a\x19\n\tMessageBB\x12\x0c\n\x04name\x18\x01 \x01(\tb\x06proto3"
11
+
12
+ pool = Google::Protobuf::DescriptorPool.generated_pool
13
+ pool.add_serialized_file(descriptor_data)
14
+
15
+ module Referenced
16
+ module V1
17
+ MessageA = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('referenced.v1.MessageA').msgclass
18
+ MessageA::MessageAA = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('referenced.v1.MessageA.MessageAA').msgclass
19
+ MessageA::MessageAB = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('referenced.v1.MessageA.MessageAB').msgclass
20
+ MessageB = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('referenced.v1.MessageB').msgclass
21
+ MessageB::MessageBA = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('referenced.v1.MessageB.MessageBA').msgclass
22
+ MessageB::MessageBB = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('referenced.v1.MessageB.MessageBB').msgclass
23
+ end
24
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
4
+ # source: simple/simple.proto
5
+
6
+ require 'google/protobuf'
7
+
8
+ descriptor_data = "\n\x13simple/simple.proto\x12\tsimple.v1\"\x1d\n\rSimpleMessage\x12\x0c\n\x04name\x18\x01 \x01(\t*,\n\nSimpleEnum\x12\x0e\n\nSIMPLE_FOO\x10\x00\x12\x0e\n\nSIMPLE_BAR\x10\x01\x62\x06proto3"
9
+
10
+ pool = Google::Protobuf::DescriptorPool.generated_pool
11
+ pool.add_serialized_file(descriptor_data)
12
+
13
+ module Simple
14
+ module V1
15
+ SimpleMessage = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('simple.v1.SimpleMessage').msgclass
16
+ SimpleEnum = ::Google::Protobuf::DescriptorPool.generated_pool.lookup('simple.v1.SimpleEnum').enummodule
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'schema_registry_client/output/json_schema'
4
+
5
+ RSpec.describe SchemaRegistry::Output::JsonSchema do
6
+ it 'should output as expected' do
7
+ output = described_class.output(Everything::V1::TestAllTypes.descriptor.to_proto)
8
+
9
+ expected = File.read("#{__dir__}/schemas/everything/everything.json")
10
+ expect("#{output}\n").to eq(expected)
11
+ end
12
+ end