avro_turf 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +25 -0
- data/avro_turf.gemspec +4 -1
- data/lib/avro_turf/cached_schema_registry.rb +16 -0
- data/lib/avro_turf/messaging.rb +95 -0
- data/lib/avro_turf/schema_registry.rb +45 -0
- data/lib/avro_turf/version.rb +1 -1
- data/spec/fake_schema_registry_server.rb +25 -0
- data/spec/messaging_spec.rb +52 -0
- data/spec/schema_registry_spec.rb +32 -0
- data/spec/spec_helper.rb +1 -0
- metadata +52 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bdd8c25f6656ec55bcb0054fa42e4a85d5018fa6
|
4
|
+
data.tar.gz: af4d189168531a28fc31793f8f3c65146c70e976
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 039e85f46a0d25766bc24dabe3984e4f1fc6b4c535aaacf53a87d8db8ee308693c5c210eb7c550bcf752de2ab3048cbed06a4bb003e23123b0ea8dfb37c0e648
|
7
|
+
data.tar.gz: d40406ac87ac25fd476b82c14a8f732daf9067d26176aa2138d234264a8bb8c543ed3d55623161c4adb9320311296485297bb93a96459ac240739ceaa659df3f
|
data/README.md
CHANGED
@@ -91,3 +91,28 @@ In the example above, the `person` schema references the `address` schema, even
|
|
91
91
|
There's no reason to copy-paste the `person` schema into the `person_list` schema, as you can reference it directly.
|
92
92
|
|
93
93
|
This feature helps avoid subtle errors when the same type is represented using slightly different schemas.
|
94
|
+
|
95
|
+
|
96
|
+
### Using a Schema Registry
|
97
|
+
|
98
|
+
By default, AvroTurf will encode data in the Avro data file format. This means that the schema used to encode the data is prepended to the output. If you want to decrease the size of the output, e.g. when storing data in a log such as Apache Kafka or in a database, you can use the `AvroTurf::Messaging` API. This top-level API requires the use of [Schema Registry](https://github.com/confluentinc/schema-registry), a service which allows registering and fetching Avro schemas.
|
99
|
+
|
100
|
+
The Messaging API will automatically register schemas used for encoding data, and will fetch the corresponding schema when decoding. Instead of including the full schema in the output, only a schema id generated by the registry is included. Registering the same schema twice is idempotent, so no coordination is needed.
|
101
|
+
|
102
|
+
**NOTE:** The Messaging format is _not_ compatible with the Avro data file API.
|
103
|
+
|
104
|
+
Using the Messaging API is simple once you have set up a Schema Registry service:
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
# You need to pass the URL of your Schema Registry.
|
108
|
+
avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
|
109
|
+
|
110
|
+
# The API for encoding and decoding data is similar to the default one. Encoding
|
111
|
+
# data has the side effect of registering the schema. This only happens the first
|
112
|
+
# time a schema is used.
|
113
|
+
data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
|
114
|
+
|
115
|
+
# When decoding, the schema will be fetched from the registry and cached. Subsequent
|
116
|
+
# instances of the same schema id will be served by the cache.
|
117
|
+
avro.decode(data) #=> { "title" => "hello, world" }
|
118
|
+
```
|
data/avro_turf.gemspec
CHANGED
@@ -18,9 +18,12 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
|
20
20
|
spec.add_dependency "avro", "~> 1.7.7"
|
21
|
+
spec.add_dependency "excon", "~> 0.45.4"
|
21
22
|
|
22
23
|
spec.add_development_dependency "bundler", "~> 1.7"
|
23
24
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
25
|
spec.add_development_dependency "rspec", "~> 3.2.0"
|
25
|
-
spec.add_development_dependency "fakefs"
|
26
|
+
spec.add_development_dependency "fakefs", "~> 0.6.7"
|
27
|
+
spec.add_development_dependency "webmock"
|
28
|
+
spec.add_development_dependency "sinatra"
|
26
29
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Caches registrations and lookups to the schema registry in memory.
|
2
|
+
class AvroTurf::CachedSchemaRegistry
|
3
|
+
def initialize(upstream)
|
4
|
+
@upstream = upstream
|
5
|
+
@schemas_by_id = {}
|
6
|
+
@ids_by_schema = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def fetch(id)
|
10
|
+
@schemas_by_id[id] ||= @upstream.fetch(id)
|
11
|
+
end
|
12
|
+
|
13
|
+
def register(subject, schema)
|
14
|
+
@ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'avro_turf'
|
3
|
+
require 'avro_turf/schema_store'
|
4
|
+
require 'avro_turf/schema_registry'
|
5
|
+
require 'avro_turf/cached_schema_registry'
|
6
|
+
|
7
|
+
class AvroTurf
|
8
|
+
|
9
|
+
# Provides a way to encode and decode messages without having to embed schemas
|
10
|
+
# in the encoded data. Confluent's Schema Registry[1] is used to register
|
11
|
+
# a schema when encoding a message -- the registry will issue a schema id that
|
12
|
+
# will be included in the encoded data alongside the actual message. When
|
13
|
+
# decoding the data, the schema id will be used to look up the writer's schema
|
14
|
+
# from the registry.
|
15
|
+
#
|
16
|
+
# 1: https://github.com/confluentinc/schema-registry
|
17
|
+
class Messaging
|
18
|
+
MAGIC_BYTE = [0].pack("C").freeze
|
19
|
+
|
20
|
+
# Instantiate a new Messaging instance with the given configuration.
|
21
|
+
#
|
22
|
+
# registry_url - The String URL of the schema registry that should be used.
|
23
|
+
# schemas_path - The String file system path where local schemas are stored.
|
24
|
+
# namespace - The String default schema namespace.
|
25
|
+
# logger - The Logger that should be used to log information (optional).
|
26
|
+
def initialize(registry_url: nil, schemas_path: nil, namespace: nil, logger: nil)
|
27
|
+
@logger = logger || Logger.new($stderr)
|
28
|
+
@namespace = namespace
|
29
|
+
@schema_store = SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
|
30
|
+
@registry = CachedSchemaRegistry.new(SchemaRegistry.new(registry_url, logger: @logger))
|
31
|
+
end
|
32
|
+
|
33
|
+
# Encodes a message using the specified schema.
|
34
|
+
#
|
35
|
+
# message - The message that should be encoded. Must be compatible with
|
36
|
+
# the schema.
|
37
|
+
# schema_name - The String name of the schema that should be used to encode
|
38
|
+
# the data.
|
39
|
+
# namespace - The namespace of the schema (optional).
|
40
|
+
#
|
41
|
+
# Returns the encoded data as a String.
|
42
|
+
def encode(message, schema_name: nil, namespace: @namespace)
|
43
|
+
schema = @schema_store.find(schema_name, namespace)
|
44
|
+
|
45
|
+
# Schemas are registered under the full name of the top level Avro record
|
46
|
+
# type.
|
47
|
+
schema_id = @registry.register(schema.fullname, schema)
|
48
|
+
|
49
|
+
stream = StringIO.new
|
50
|
+
writer = Avro::IO::DatumWriter.new(schema)
|
51
|
+
encoder = Avro::IO::BinaryEncoder.new(stream)
|
52
|
+
|
53
|
+
# Always start with the magic byte.
|
54
|
+
encoder.write(MAGIC_BYTE)
|
55
|
+
|
56
|
+
# The schema id is encoded as a 4-byte big-endian integer.
|
57
|
+
encoder.write([schema_id].pack("N"))
|
58
|
+
|
59
|
+
# The actual message comes last.
|
60
|
+
writer.write(message, encoder)
|
61
|
+
|
62
|
+
stream.string
|
63
|
+
end
|
64
|
+
|
65
|
+
# Decodes data into the original message.
|
66
|
+
#
|
67
|
+
# data - A String containing encoded data.
|
68
|
+
# schema_name - The String name of the schema that should be used to decode
|
69
|
+
# the data. Must match the schema used when encoding (optional).
|
70
|
+
# namespace - The namespace of the schema (optional).
|
71
|
+
#
|
72
|
+
# Returns the decoded message.
|
73
|
+
def decode(data, schema_name: nil, namespace: @namespace)
|
74
|
+
readers_schema = schema_name && @schema_store.find(schema_name, namespace)
|
75
|
+
stream = StringIO.new(data)
|
76
|
+
decoder = Avro::IO::BinaryDecoder.new(stream)
|
77
|
+
|
78
|
+
# The first byte is MAGIC!!!
|
79
|
+
magic_byte = decoder.read(1)
|
80
|
+
|
81
|
+
if magic_byte != MAGIC_BYTE
|
82
|
+
raise "Expected data to begin with a magic byte, got `#{magic_byte.inspect}`"
|
83
|
+
end
|
84
|
+
|
85
|
+
# The schema id is a 4-byte big-endian integer.
|
86
|
+
schema_id = decoder.read(4).unpack("N").first
|
87
|
+
|
88
|
+
writers_schema_json = @registry.fetch(schema_id)
|
89
|
+
writers_schema = Avro::Schema.parse(writers_schema_json)
|
90
|
+
|
91
|
+
reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
|
92
|
+
reader.read(decoder)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'excon'
|
2
|
+
|
3
|
+
class AvroTurf::SchemaRegistry
|
4
|
+
CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
|
5
|
+
|
6
|
+
def initialize(url, logger: Logger.new($stdout))
|
7
|
+
@logger = logger
|
8
|
+
@connection = Excon.new(url, headers: {
|
9
|
+
"Content-Type" => CONTENT_TYPE,
|
10
|
+
})
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(id)
|
14
|
+
@logger.info "Fetching schema with id #{id}"
|
15
|
+
data = get("/schemas/ids/#{id}")
|
16
|
+
data.fetch("schema")
|
17
|
+
end
|
18
|
+
|
19
|
+
def register(subject, schema)
|
20
|
+
data = post("/subjects/#{subject}/versions", body: {
|
21
|
+
schema: schema
|
22
|
+
}.to_json)
|
23
|
+
|
24
|
+
id = data.fetch("id")
|
25
|
+
|
26
|
+
@logger.info "Registered schema for subject `#{subject}`; id = #{id}"
|
27
|
+
|
28
|
+
id
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def get(path, **options)
|
34
|
+
request(path, method: :get, **options)
|
35
|
+
end
|
36
|
+
|
37
|
+
def post(path, **options)
|
38
|
+
request(path, method: :post, **options)
|
39
|
+
end
|
40
|
+
|
41
|
+
def request(path, **options)
|
42
|
+
response = @connection.request(path: path, expects: 200, **options)
|
43
|
+
JSON.parse(response.body)
|
44
|
+
end
|
45
|
+
end
|
data/lib/avro_turf/version.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
|
3
|
+
class FakeSchemaRegistryServer < Sinatra::Base
|
4
|
+
SCHEMAS = []
|
5
|
+
|
6
|
+
post "/subjects/:subject/versions" do
|
7
|
+
request.body.rewind
|
8
|
+
schema = JSON.parse(request.body.read).fetch("schema")
|
9
|
+
|
10
|
+
SCHEMAS << schema
|
11
|
+
schema_id = SCHEMAS.size - 1
|
12
|
+
|
13
|
+
{ id: schema_id }.to_json
|
14
|
+
end
|
15
|
+
|
16
|
+
get "/schemas/ids/:schema_id" do
|
17
|
+
schema = SCHEMAS.at(params[:schema_id].to_i)
|
18
|
+
|
19
|
+
{ schema: schema }.to_json
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.clear
|
23
|
+
SCHEMAS.clear
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'webmock/rspec'
|
2
|
+
require 'avro_turf/messaging'
|
3
|
+
require_relative 'fake_schema_registry_server'
|
4
|
+
|
5
|
+
describe AvroTurf::Messaging do
|
6
|
+
let(:registry_url) { "http://registry.example.com" }
|
7
|
+
let(:logger) { Logger.new(StringIO.new) }
|
8
|
+
|
9
|
+
let(:avro) {
|
10
|
+
AvroTurf::Messaging.new(
|
11
|
+
registry_url: registry_url,
|
12
|
+
schemas_path: "spec/schemas",
|
13
|
+
logger: logger
|
14
|
+
)
|
15
|
+
}
|
16
|
+
|
17
|
+
before do
|
18
|
+
FileUtils.mkdir_p("spec/schemas")
|
19
|
+
end
|
20
|
+
|
21
|
+
before do
|
22
|
+
stub_request(:any, /^#{registry_url}/).to_rack(FakeSchemaRegistryServer)
|
23
|
+
FakeSchemaRegistryServer.clear
|
24
|
+
end
|
25
|
+
|
26
|
+
before do
|
27
|
+
define_schema "person.avsc", <<-AVSC
|
28
|
+
{
|
29
|
+
"name": "person",
|
30
|
+
"type": "record",
|
31
|
+
"fields": [
|
32
|
+
{
|
33
|
+
"type": "string",
|
34
|
+
"name": "full_name"
|
35
|
+
}
|
36
|
+
]
|
37
|
+
}
|
38
|
+
AVSC
|
39
|
+
end
|
40
|
+
|
41
|
+
it "encodes and decodes messages" do
|
42
|
+
message = { "full_name" => "John Doe" }
|
43
|
+
data = avro.encode(message, schema_name: "person")
|
44
|
+
expect(avro.decode(data)).to eq message
|
45
|
+
end
|
46
|
+
|
47
|
+
it "allows specifying a reader's schema" do
|
48
|
+
message = { "full_name" => "John Doe" }
|
49
|
+
data = avro.encode(message, schema_name: "person")
|
50
|
+
expect(avro.decode(data, schema_name: "person")).to eq message
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'webmock/rspec'
|
2
|
+
require 'avro_turf/schema_registry'
|
3
|
+
require_relative 'fake_schema_registry_server'
|
4
|
+
|
5
|
+
describe AvroTurf::SchemaRegistry do
|
6
|
+
let(:registry_url) { "http://registry.example.com" }
|
7
|
+
|
8
|
+
before do
|
9
|
+
stub_request(:any, /^#{registry_url}/).to_rack(FakeSchemaRegistryServer)
|
10
|
+
FakeSchemaRegistryServer.clear
|
11
|
+
end
|
12
|
+
|
13
|
+
it "allows registering a schema" do
|
14
|
+
logger = Logger.new(StringIO.new)
|
15
|
+
registry = described_class.new(registry_url, logger: logger)
|
16
|
+
|
17
|
+
schema = <<-JSON
|
18
|
+
{
|
19
|
+
"type": "record",
|
20
|
+
"name": "person",
|
21
|
+
"fields": [
|
22
|
+
{ "name": "name", "type": "string" }
|
23
|
+
]
|
24
|
+
}
|
25
|
+
JSON
|
26
|
+
|
27
|
+
id = registry.register("some-subject", schema)
|
28
|
+
fetched_schema = registry.fetch(id)
|
29
|
+
|
30
|
+
expect(JSON.parse(fetched_schema)).to eq JSON.parse(schema)
|
31
|
+
end
|
32
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: avro_turf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.7.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: excon
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.45.4
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.45.4
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -68,6 +82,34 @@ dependencies:
|
|
68
82
|
version: 3.2.0
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: fakefs
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.6.7
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.6.7
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: webmock
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: sinatra
|
71
113
|
requirement: !ruby/object:Gem::Requirement
|
72
114
|
requirements:
|
73
115
|
- - ">="
|
@@ -96,6 +138,7 @@ files:
|
|
96
138
|
- avro_turf.gemspec
|
97
139
|
- circle.yml
|
98
140
|
- lib/avro_turf.rb
|
141
|
+
- lib/avro_turf/cached_schema_registry.rb
|
99
142
|
- lib/avro_turf/core_ext.rb
|
100
143
|
- lib/avro_turf/core_ext/date.rb
|
101
144
|
- lib/avro_turf/core_ext/enumerable.rb
|
@@ -107,6 +150,8 @@ files:
|
|
107
150
|
- lib/avro_turf/core_ext/symbol.rb
|
108
151
|
- lib/avro_turf/core_ext/time.rb
|
109
152
|
- lib/avro_turf/core_ext/true_class.rb
|
153
|
+
- lib/avro_turf/messaging.rb
|
154
|
+
- lib/avro_turf/schema_registry.rb
|
110
155
|
- lib/avro_turf/schema_store.rb
|
111
156
|
- lib/avro_turf/version.rb
|
112
157
|
- perf/address.avsc
|
@@ -124,6 +169,9 @@ files:
|
|
124
169
|
- spec/core_ext/symbol_spec.rb
|
125
170
|
- spec/core_ext/time_spec.rb
|
126
171
|
- spec/core_ext/true_class_spec.rb
|
172
|
+
- spec/fake_schema_registry_server.rb
|
173
|
+
- spec/messaging_spec.rb
|
174
|
+
- spec/schema_registry_spec.rb
|
127
175
|
- spec/schema_store_spec.rb
|
128
176
|
- spec/spec_helper.rb
|
129
177
|
homepage: https://github.com/dasch/avro_turf
|
@@ -163,6 +211,9 @@ test_files:
|
|
163
211
|
- spec/core_ext/symbol_spec.rb
|
164
212
|
- spec/core_ext/time_spec.rb
|
165
213
|
- spec/core_ext/true_class_spec.rb
|
214
|
+
- spec/fake_schema_registry_server.rb
|
215
|
+
- spec/messaging_spec.rb
|
216
|
+
- spec/schema_registry_spec.rb
|
166
217
|
- spec/schema_store_spec.rb
|
167
218
|
- spec/spec_helper.rb
|
168
219
|
has_rdoc:
|