avro_turf_enchanced 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +174 -0
- data/Rakefile +2 -0
- data/avro_turf.gemspec +30 -0
- data/circle.yml +4 -0
- data/lib/avro_turf.rb +105 -0
- data/lib/avro_turf/cached_schema_registry.rb +26 -0
- data/lib/avro_turf/core_ext.rb +10 -0
- data/lib/avro_turf/core_ext/date.rb +5 -0
- data/lib/avro_turf/core_ext/enumerable.rb +5 -0
- data/lib/avro_turf/core_ext/false_class.rb +5 -0
- data/lib/avro_turf/core_ext/hash.rb +7 -0
- data/lib/avro_turf/core_ext/nil_class.rb +5 -0
- data/lib/avro_turf/core_ext/numeric.rb +5 -0
- data/lib/avro_turf/core_ext/string.rb +5 -0
- data/lib/avro_turf/core_ext/symbol.rb +5 -0
- data/lib/avro_turf/core_ext/time.rb +5 -0
- data/lib/avro_turf/core_ext/true_class.rb +5 -0
- data/lib/avro_turf/messaging.rb +102 -0
- data/lib/avro_turf/schema_registry.rb +79 -0
- data/lib/avro_turf/schema_store.rb +58 -0
- data/lib/avro_turf/schema_to_avro_patch.rb +52 -0
- data/lib/avro_turf/test/fake_schema_registry_server.rb +84 -0
- data/lib/avro_turf/version.rb +3 -0
- data/perf/address.avsc +14 -0
- data/perf/encoding_size.rb +26 -0
- data/perf/encoding_speed.rb +30 -0
- data/perf/person.avsc +14 -0
- data/spec/avro_turf_spec.rb +161 -0
- data/spec/cached_schema_registry_spec.rb +41 -0
- data/spec/core_ext/date_spec.rb +6 -0
- data/spec/core_ext/enumerable_spec.rb +12 -0
- data/spec/core_ext/false_class_spec.rb +5 -0
- data/spec/core_ext/hash_spec.rb +8 -0
- data/spec/core_ext/nil_class_spec.rb +5 -0
- data/spec/core_ext/numeric_spec.rb +6 -0
- data/spec/core_ext/string_spec.rb +5 -0
- data/spec/core_ext/symbol_spec.rb +5 -0
- data/spec/core_ext/time_spec.rb +6 -0
- data/spec/core_ext/true_class_spec.rb +5 -0
- data/spec/messaging_spec.rb +112 -0
- data/spec/schema_registry_spec.rb +9 -0
- data/spec/schema_store_spec.rb +253 -0
- data/spec/schema_to_avro_patch_spec.rb +66 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/schema_registry_context.rb +190 -0
- metadata +244 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'avro_turf'
|
3
|
+
require 'avro_turf/schema_store'
|
4
|
+
require 'avro_turf/schema_registry'
|
5
|
+
require 'avro_turf/cached_schema_registry'
|
6
|
+
|
7
|
+
class AvroTurf
|
8
|
+
|
9
|
+
# Provides a way to encode and decode messages without having to embed schemas
|
10
|
+
# in the encoded data. Confluent's Schema Registry[1] is used to register
|
11
|
+
# a schema when encoding a message -- the registry will issue a schema id that
|
12
|
+
# will be included in the encoded data alongside the actual message. When
|
13
|
+
# decoding the data, the schema id will be used to look up the writer's schema
|
14
|
+
# from the registry.
|
15
|
+
#
|
16
|
+
# 1: https://github.com/confluentinc/schema-registry
|
17
|
+
class Messaging
|
18
|
+
MAGIC_BYTE = [0].pack("C").freeze
|
19
|
+
|
20
|
+
# Instantiate a new Messaging instance with the given configuration.
|
21
|
+
#
|
22
|
+
# registry - A schema registry object that responds to all methods in the
|
23
|
+
# AvroTurf::SchemaRegistry interface.
|
24
|
+
# registry_url - The String URL of the schema registry that should be used.
|
25
|
+
# schema_store - A schema store object that responds to #find(schema_name, namespace).
|
26
|
+
# schemas_path - The String file system path where local schemas are stored.
|
27
|
+
# namespace - The String default schema namespace.
|
28
|
+
# logger - The Logger that should be used to log information (optional).
|
29
|
+
def initialize(registry: nil, registry_url: nil, schema_store: nil, schemas_path: nil, namespace: nil, logger: nil)
|
30
|
+
@logger = logger || Logger.new($stderr)
|
31
|
+
@namespace = namespace
|
32
|
+
@schema_store = schema_store || SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
|
33
|
+
@registry = registry || CachedSchemaRegistry.new(SchemaRegistry.new(registry_url, logger: @logger))
|
34
|
+
@schemas_by_id = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Encodes a message using the specified schema.
|
38
|
+
#
|
39
|
+
# message - The message that should be encoded. Must be compatible with
|
40
|
+
# the schema.
|
41
|
+
# schema_name - The String name of the schema that should be used to encode
|
42
|
+
# the data.
|
43
|
+
# namespace - The namespace of the schema (optional).
|
44
|
+
#
|
45
|
+
# Returns the encoded data as a String.
|
46
|
+
def encode(message, schema: nil, namespace: @namespace, subject: nil)
|
47
|
+
# schema = @schema_store.find(schema_name, namespace)
|
48
|
+
|
49
|
+
# Schemas are registered under the full name of the top level Avro record
|
50
|
+
# type, or `subject` if it's provided.
|
51
|
+
schema_id = @registry.register(subject || schema.fullname, schema)
|
52
|
+
|
53
|
+
stream = StringIO.new
|
54
|
+
writer = Avro::IO::DatumWriter.new(schema)
|
55
|
+
encoder = Avro::IO::BinaryEncoder.new(stream)
|
56
|
+
|
57
|
+
# Always start with the magic byte.
|
58
|
+
encoder.write(MAGIC_BYTE)
|
59
|
+
|
60
|
+
# The schema id is encoded as a 4-byte big-endian integer.
|
61
|
+
encoder.write([schema_id].pack("N"))
|
62
|
+
|
63
|
+
# The actual message comes last.
|
64
|
+
writer.write(message, encoder)
|
65
|
+
|
66
|
+
stream.string
|
67
|
+
end
|
68
|
+
|
69
|
+
# Decodes data into the original message.
|
70
|
+
#
|
71
|
+
# data - A String containing encoded data.
|
72
|
+
# schema_name - The String name of the schema that should be used to decode
|
73
|
+
# the data. Must match the schema used when encoding (optional).
|
74
|
+
# namespace - The namespace of the schema (optional).
|
75
|
+
#
|
76
|
+
# Returns the decoded message.
|
77
|
+
def decode(data, schema_name: nil, namespace: @namespace)
|
78
|
+
readers_schema = schema_name && @schema_store.find(schema_name, namespace)
|
79
|
+
stream = StringIO.new(data)
|
80
|
+
decoder = Avro::IO::BinaryDecoder.new(stream)
|
81
|
+
|
82
|
+
# The first byte is MAGIC!!!
|
83
|
+
magic_byte = decoder.read(1)
|
84
|
+
|
85
|
+
if magic_byte != MAGIC_BYTE
|
86
|
+
raise "Expected data to begin with a magic byte, got `#{magic_byte.inspect}`"
|
87
|
+
end
|
88
|
+
|
89
|
+
# The schema id is a 4-byte big-endian integer.
|
90
|
+
schema_id = decoder.read(4).unpack("N").first
|
91
|
+
|
92
|
+
writers_schema = @schemas_by_id.fetch(schema_id) do
|
93
|
+
schema_json = @registry.fetch(schema_id)
|
94
|
+
@schemas_by_id[schema_id] = Avro::Schema.parse(schema_json)
|
95
|
+
end
|
96
|
+
|
97
|
+
reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
|
98
|
+
reader.read(decoder)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'excon'
|
2
|
+
|
3
|
+
class AvroTurf::SchemaRegistry
|
4
|
+
CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
|
5
|
+
|
6
|
+
def initialize(url, logger: Logger.new($stdout))
|
7
|
+
@logger = logger
|
8
|
+
@connection = Excon.new(url, headers: {
|
9
|
+
"Content-Type" => CONTENT_TYPE,
|
10
|
+
})
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(id)
|
14
|
+
@logger.info "Fetching schema with id #{id}"
|
15
|
+
data = get("/schemas/ids/#{id}")
|
16
|
+
data.fetch("schema")
|
17
|
+
end
|
18
|
+
|
19
|
+
def register(subject, schema)
|
20
|
+
data = post("/subjects/#{subject}/versions", body: {
|
21
|
+
schema: schema.to_s
|
22
|
+
}.to_json)
|
23
|
+
|
24
|
+
id = data.fetch("id")
|
25
|
+
|
26
|
+
@logger.info "Registered schema for subject `#{subject}`; id = #{id}"
|
27
|
+
|
28
|
+
id
|
29
|
+
end
|
30
|
+
|
31
|
+
# List all subjects
|
32
|
+
def subjects
|
33
|
+
get('/subjects')
|
34
|
+
end
|
35
|
+
|
36
|
+
# List all versions for a subject
|
37
|
+
def subject_versions(subject)
|
38
|
+
get("/subjects/#{subject}/versions")
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get a specific version for a subject
|
42
|
+
def subject_version(subject, version = 'latest')
|
43
|
+
get("/subjects/#{subject}/versions/#{version}")
|
44
|
+
end
|
45
|
+
|
46
|
+
# Check if a schema exists. Returns nil if not found.
|
47
|
+
def check(subject, schema)
|
48
|
+
data = post("/subjects/#{subject}",
|
49
|
+
expects: [200, 404],
|
50
|
+
body: { schema: schema.to_s }.to_json)
|
51
|
+
data unless data.has_key?("error_code")
|
52
|
+
end
|
53
|
+
|
54
|
+
# Check if a schema is compatible with the stored version.
|
55
|
+
# Returns true if compatible, false otherwise
|
56
|
+
# http://docs.confluent.io/2.0.0/schema-registry/docs/api.html#compatibility
|
57
|
+
def compatible?(subject, schema, version = 'latest')
|
58
|
+
data = post("/compatibility/subjects/#{subject}/versions/#{version}",
|
59
|
+
expects: [200, 404],
|
60
|
+
body: { schema: schema.to_s }.to_json)
|
61
|
+
data.fetch('is_compatible', false) unless data.has_key?('error_code')
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def get(path, **options)
|
67
|
+
request(path, method: :get, **options)
|
68
|
+
end
|
69
|
+
|
70
|
+
def post(path, **options)
|
71
|
+
request(path, method: :post, **options)
|
72
|
+
end
|
73
|
+
|
74
|
+
def request(path, **options)
|
75
|
+
options = { expects: 200 }.merge!(options)
|
76
|
+
response = @connection.request(path: path, **options)
|
77
|
+
JSON.parse(response.body)
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
class AvroTurf::SchemaStore
|
2
|
+
def initialize(path: nil)
|
3
|
+
@path = path or raise "Please specify a schema path"
|
4
|
+
@schemas = Hash.new
|
5
|
+
end
|
6
|
+
|
7
|
+
# Resolves and returns a schema.
|
8
|
+
#
|
9
|
+
# schema_name - The String name of the schema to resolve.
|
10
|
+
#
|
11
|
+
# Returns an Avro::Schema.
|
12
|
+
def find(name, namespace = nil)
|
13
|
+
fullname = Avro::Name.make_fullname(name, namespace)
|
14
|
+
|
15
|
+
return @schemas[fullname] if @schemas.key?(fullname)
|
16
|
+
|
17
|
+
*namespace, schema_name = fullname.split(".")
|
18
|
+
schema_path = File.join(@path, *namespace, schema_name + ".avsc")
|
19
|
+
schema_json = JSON.parse(File.read(schema_path))
|
20
|
+
schema = Avro::Schema.real_parse(schema_json, @schemas)
|
21
|
+
|
22
|
+
if schema.respond_to?(:fullname) && schema.fullname != fullname
|
23
|
+
raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
|
24
|
+
end
|
25
|
+
|
26
|
+
schema
|
27
|
+
rescue ::Avro::SchemaParseError => e
|
28
|
+
# This is a hack in order to figure out exactly which type was missing. The
|
29
|
+
# Avro gem ought to provide this data directly.
|
30
|
+
if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
|
31
|
+
find($1)
|
32
|
+
|
33
|
+
# Re-resolve the original schema now that the dependency has been resolved.
|
34
|
+
@schemas.delete(fullname)
|
35
|
+
find(fullname)
|
36
|
+
else
|
37
|
+
raise
|
38
|
+
end
|
39
|
+
rescue Errno::ENOENT, Errno::ENAMETOOLONG
|
40
|
+
raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
|
41
|
+
end
|
42
|
+
|
43
|
+
# Loads all schema definition files in the `schemas_dir`.
|
44
|
+
def load_schemas!
|
45
|
+
pattern = [@path, "**", "*.avsc"].join("/")
|
46
|
+
|
47
|
+
Dir.glob(pattern) do |schema_path|
|
48
|
+
# Remove the path prefix.
|
49
|
+
schema_path.sub!(/^\/?#{@path}\//, "")
|
50
|
+
|
51
|
+
# Replace `/` with `.` and chop off the file extension.
|
52
|
+
schema_name = File.basename(schema_path.tr("/", "."), ".avsc")
|
53
|
+
|
54
|
+
# Load and cache the schema.
|
55
|
+
find(schema_name)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class AvroTurf
|
2
|
+
module AvroGemPatch
|
3
|
+
module RecordSchema
|
4
|
+
module ClassMethods
|
5
|
+
def make_field_objects(field_data, names, namespace=nil)
|
6
|
+
new_field_data = []
|
7
|
+
field_data.each do |field|
|
8
|
+
if field.respond_to?(:[]) && !field.key?('default')
|
9
|
+
field = field.clone
|
10
|
+
field['default'] = :no_default
|
11
|
+
end
|
12
|
+
new_field_data << field
|
13
|
+
end
|
14
|
+
super(new_field_data, names, namespace)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.prepended(base)
|
19
|
+
class << base
|
20
|
+
prepend ClassMethods
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module Field
|
26
|
+
def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
|
27
|
+
super(type, name, default, order, names, namespace)
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_avro(names=Set.new)
|
31
|
+
{'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
|
32
|
+
avro['default'] = default unless default == :no_default
|
33
|
+
avro['order'] = order if order
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
module DatumReader
|
39
|
+
def read_default_value(field_schema, default_value)
|
40
|
+
if default_value == :no_default
|
41
|
+
raise Avro::AvroError, "Missing data for #{field_schema} with no default"
|
42
|
+
end
|
43
|
+
|
44
|
+
super
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
Avro::Schema::RecordSchema.send(:prepend, AvroTurf::AvroGemPatch::RecordSchema)
|
51
|
+
Avro::Schema::Field.send(:prepend, AvroTurf::AvroGemPatch::Field)
|
52
|
+
Avro::IO::DatumReader.send(:prepend, AvroTurf::AvroGemPatch::DatumReader)
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
|
3
|
+
class FakeSchemaRegistryServer < Sinatra::Base
|
4
|
+
SUBJECTS = Hash.new { Array.new }
|
5
|
+
SCHEMAS = []
|
6
|
+
SUBJECT_NOT_FOUND = { error_code: 40401, message: 'Subject not found' }.to_json.freeze
|
7
|
+
VERSION_NOT_FOUND = { error_code: 40402, message: 'Version not found' }.to_json.freeze
|
8
|
+
SCHEMA_NOT_FOUND = { error_code: 40403, message: 'Schema not found' }.to_json.freeze
|
9
|
+
|
10
|
+
helpers do
|
11
|
+
def parse_schema
|
12
|
+
request.body.rewind
|
13
|
+
JSON.parse(request.body.read).fetch("schema").tap do |schema|
|
14
|
+
Avro::Schema.parse(schema)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
post "/subjects/:subject/versions" do
|
20
|
+
SCHEMAS << parse_schema
|
21
|
+
|
22
|
+
schema_id = SCHEMAS.size - 1
|
23
|
+
SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
|
24
|
+
{ id: schema_id }.to_json
|
25
|
+
end
|
26
|
+
|
27
|
+
get "/schemas/ids/:schema_id" do
|
28
|
+
schema = SCHEMAS.at(params[:schema_id].to_i)
|
29
|
+
halt(404, SCHEMA_NOT_FOUND) unless schema
|
30
|
+
{ schema: schema }.to_json
|
31
|
+
end
|
32
|
+
|
33
|
+
get "/subjects" do
|
34
|
+
SUBJECTS.keys.to_json
|
35
|
+
end
|
36
|
+
|
37
|
+
get "/subjects/:subject/versions" do
|
38
|
+
schema_ids = SUBJECTS[params[:subject]]
|
39
|
+
halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
|
40
|
+
(1..schema_ids.size).to_a.to_json
|
41
|
+
end
|
42
|
+
|
43
|
+
get "/subjects/:subject/versions/:version" do
|
44
|
+
schema_ids = SUBJECTS[params[:subject]]
|
45
|
+
halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
|
46
|
+
|
47
|
+
schema_id = if params[:version] == 'latest'
|
48
|
+
schema_ids.last
|
49
|
+
else
|
50
|
+
schema_ids.at(Integer(params[:version]) - 1)
|
51
|
+
end
|
52
|
+
halt(404, VERSION_NOT_FOUND) unless schema_id
|
53
|
+
|
54
|
+
schema = SCHEMAS.at(schema_id)
|
55
|
+
|
56
|
+
{
|
57
|
+
name: params[:subject],
|
58
|
+
version: schema_ids.index(schema_id) + 1,
|
59
|
+
schema: schema
|
60
|
+
}.to_json
|
61
|
+
end
|
62
|
+
|
63
|
+
post "/subjects/:subject" do
|
64
|
+
schema = parse_schema
|
65
|
+
|
66
|
+
# Note: this does not actually handle the same schema registered under
|
67
|
+
# multiple subjects
|
68
|
+
schema_id = SCHEMAS.index(schema)
|
69
|
+
|
70
|
+
halt(404, SCHEMA_NOT_FOUND) unless schema_id
|
71
|
+
|
72
|
+
{
|
73
|
+
subject: params[:subject],
|
74
|
+
id: schema_id,
|
75
|
+
version: SUBJECTS[params[:subject]].index(schema_id) + 1,
|
76
|
+
schema: schema
|
77
|
+
}.to_json
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.clear
|
81
|
+
SUBJECTS.clear
|
82
|
+
SCHEMAS.clear
|
83
|
+
end
|
84
|
+
end
|