avro_turf_enchanced 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +7 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +174 -0
  7. data/Rakefile +2 -0
  8. data/avro_turf.gemspec +30 -0
  9. data/circle.yml +4 -0
  10. data/lib/avro_turf.rb +105 -0
  11. data/lib/avro_turf/cached_schema_registry.rb +26 -0
  12. data/lib/avro_turf/core_ext.rb +10 -0
  13. data/lib/avro_turf/core_ext/date.rb +5 -0
  14. data/lib/avro_turf/core_ext/enumerable.rb +5 -0
  15. data/lib/avro_turf/core_ext/false_class.rb +5 -0
  16. data/lib/avro_turf/core_ext/hash.rb +7 -0
  17. data/lib/avro_turf/core_ext/nil_class.rb +5 -0
  18. data/lib/avro_turf/core_ext/numeric.rb +5 -0
  19. data/lib/avro_turf/core_ext/string.rb +5 -0
  20. data/lib/avro_turf/core_ext/symbol.rb +5 -0
  21. data/lib/avro_turf/core_ext/time.rb +5 -0
  22. data/lib/avro_turf/core_ext/true_class.rb +5 -0
  23. data/lib/avro_turf/messaging.rb +102 -0
  24. data/lib/avro_turf/schema_registry.rb +79 -0
  25. data/lib/avro_turf/schema_store.rb +58 -0
  26. data/lib/avro_turf/schema_to_avro_patch.rb +52 -0
  27. data/lib/avro_turf/test/fake_schema_registry_server.rb +84 -0
  28. data/lib/avro_turf/version.rb +3 -0
  29. data/perf/address.avsc +14 -0
  30. data/perf/encoding_size.rb +26 -0
  31. data/perf/encoding_speed.rb +30 -0
  32. data/perf/person.avsc +14 -0
  33. data/spec/avro_turf_spec.rb +161 -0
  34. data/spec/cached_schema_registry_spec.rb +41 -0
  35. data/spec/core_ext/date_spec.rb +6 -0
  36. data/spec/core_ext/enumerable_spec.rb +12 -0
  37. data/spec/core_ext/false_class_spec.rb +5 -0
  38. data/spec/core_ext/hash_spec.rb +8 -0
  39. data/spec/core_ext/nil_class_spec.rb +5 -0
  40. data/spec/core_ext/numeric_spec.rb +6 -0
  41. data/spec/core_ext/string_spec.rb +5 -0
  42. data/spec/core_ext/symbol_spec.rb +5 -0
  43. data/spec/core_ext/time_spec.rb +6 -0
  44. data/spec/core_ext/true_class_spec.rb +5 -0
  45. data/spec/messaging_spec.rb +112 -0
  46. data/spec/schema_registry_spec.rb +9 -0
  47. data/spec/schema_store_spec.rb +253 -0
  48. data/spec/schema_to_avro_patch_spec.rb +66 -0
  49. data/spec/spec_helper.rb +20 -0
  50. data/spec/support/schema_registry_context.rb +190 -0
  51. metadata +244 -0
@@ -0,0 +1,7 @@
1
+ class Hash
2
+ def as_avro
3
+ hsh = Hash.new
4
+ each {|k, v| hsh[k.as_avro] = v.as_avro }
5
+ hsh
6
+ end
7
+ end
@@ -0,0 +1,5 @@
1
+ class NilClass
2
+ def as_avro
3
+ self
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class Numeric
2
+ def as_avro
3
+ self
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class String
2
+ def as_avro
3
+ self
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class Symbol
2
+ def as_avro
3
+ to_s
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class Time
2
+ def as_avro
3
+ iso8601
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class TrueClass
2
+ def as_avro
3
+ self
4
+ end
5
+ end
@@ -0,0 +1,102 @@
1
+ require 'logger'
2
+ require 'avro_turf'
3
+ require 'avro_turf/schema_store'
4
+ require 'avro_turf/schema_registry'
5
+ require 'avro_turf/cached_schema_registry'
6
+
7
+ class AvroTurf
8
+
9
+ # Provides a way to encode and decode messages without having to embed schemas
10
+ # in the encoded data. Confluent's Schema Registry[1] is used to register
11
+ # a schema when encoding a message -- the registry will issue a schema id that
12
+ # will be included in the encoded data alongside the actual message. When
13
+ # decoding the data, the schema id will be used to look up the writer's schema
14
+ # from the registry.
15
+ #
16
+ # 1: https://github.com/confluentinc/schema-registry
17
+ class Messaging
18
+ MAGIC_BYTE = [0].pack("C").freeze
19
+
20
+ # Instantiate a new Messaging instance with the given configuration.
21
+ #
22
+ # registry - A schema registry object that responds to all methods in the
23
+ # AvroTurf::SchemaRegistry interface.
24
+ # registry_url - The String URL of the schema registry that should be used.
25
+ # schema_store - A schema store object that responds to #find(schema_name, namespace).
26
+ # schemas_path - The String file system path where local schemas are stored.
27
+ # namespace - The String default schema namespace.
28
+ # logger - The Logger that should be used to log information (optional).
29
+ def initialize(registry: nil, registry_url: nil, schema_store: nil, schemas_path: nil, namespace: nil, logger: nil)
30
+ @logger = logger || Logger.new($stderr)
31
+ @namespace = namespace
32
+ @schema_store = schema_store || SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
33
+ @registry = registry || CachedSchemaRegistry.new(SchemaRegistry.new(registry_url, logger: @logger))
34
+ @schemas_by_id = {}
35
+ end
36
+
37
+ # Encodes a message using the specified schema.
38
+ #
39
+ # message - The message that should be encoded. Must be compatible with
40
+ # the schema.
41
+ # schema_name - The String name of the schema that should be used to encode
42
+ # the data.
43
+ # namespace - The namespace of the schema (optional).
44
+ #
45
+ # Returns the encoded data as a String.
46
+ def encode(message, schema: nil, namespace: @namespace, subject: nil)
47
+ # schema = @schema_store.find(schema_name, namespace)
48
+
49
+ # Schemas are registered under the full name of the top level Avro record
50
+ # type, or `subject` if it's provided.
51
+ schema_id = @registry.register(subject || schema.fullname, schema)
52
+
53
+ stream = StringIO.new
54
+ writer = Avro::IO::DatumWriter.new(schema)
55
+ encoder = Avro::IO::BinaryEncoder.new(stream)
56
+
57
+ # Always start with the magic byte.
58
+ encoder.write(MAGIC_BYTE)
59
+
60
+ # The schema id is encoded as a 4-byte big-endian integer.
61
+ encoder.write([schema_id].pack("N"))
62
+
63
+ # The actual message comes last.
64
+ writer.write(message, encoder)
65
+
66
+ stream.string
67
+ end
68
+
69
+ # Decodes data into the original message.
70
+ #
71
+ # data - A String containing encoded data.
72
+ # schema_name - The String name of the schema that should be used to decode
73
+ # the data. Must match the schema used when encoding (optional).
74
+ # namespace - The namespace of the schema (optional).
75
+ #
76
+ # Returns the decoded message.
77
+ def decode(data, schema_name: nil, namespace: @namespace)
78
+ readers_schema = schema_name && @schema_store.find(schema_name, namespace)
79
+ stream = StringIO.new(data)
80
+ decoder = Avro::IO::BinaryDecoder.new(stream)
81
+
82
+ # The first byte is MAGIC!!!
83
+ magic_byte = decoder.read(1)
84
+
85
+ if magic_byte != MAGIC_BYTE
86
+ raise "Expected data to begin with a magic byte, got `#{magic_byte.inspect}`"
87
+ end
88
+
89
+ # The schema id is a 4-byte big-endian integer.
90
+ schema_id = decoder.read(4).unpack("N").first
91
+
92
+ writers_schema = @schemas_by_id.fetch(schema_id) do
93
+ schema_json = @registry.fetch(schema_id)
94
+ @schemas_by_id[schema_id] = Avro::Schema.parse(schema_json)
95
+ end
96
+
97
+ reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
98
+ reader.read(decoder)
99
+ end
100
+ end
101
+ end
102
+
@@ -0,0 +1,79 @@
1
+ require 'excon'
2
+
3
+ class AvroTurf::SchemaRegistry
4
+ CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
5
+
6
+ def initialize(url, logger: Logger.new($stdout))
7
+ @logger = logger
8
+ @connection = Excon.new(url, headers: {
9
+ "Content-Type" => CONTENT_TYPE,
10
+ })
11
+ end
12
+
13
+ def fetch(id)
14
+ @logger.info "Fetching schema with id #{id}"
15
+ data = get("/schemas/ids/#{id}")
16
+ data.fetch("schema")
17
+ end
18
+
19
+ def register(subject, schema)
20
+ data = post("/subjects/#{subject}/versions", body: {
21
+ schema: schema.to_s
22
+ }.to_json)
23
+
24
+ id = data.fetch("id")
25
+
26
+ @logger.info "Registered schema for subject `#{subject}`; id = #{id}"
27
+
28
+ id
29
+ end
30
+
31
+ # List all subjects
32
+ def subjects
33
+ get('/subjects')
34
+ end
35
+
36
+ # List all versions for a subject
37
+ def subject_versions(subject)
38
+ get("/subjects/#{subject}/versions")
39
+ end
40
+
41
+ # Get a specific version for a subject
42
+ def subject_version(subject, version = 'latest')
43
+ get("/subjects/#{subject}/versions/#{version}")
44
+ end
45
+
46
+ # Check if a schema exists. Returns nil if not found.
47
+ def check(subject, schema)
48
+ data = post("/subjects/#{subject}",
49
+ expects: [200, 404],
50
+ body: { schema: schema.to_s }.to_json)
51
+ data unless data.has_key?("error_code")
52
+ end
53
+
54
+ # Check if a schema is compatible with the stored version.
55
+ # Returns true if compatible, false otherwise
56
+ # http://docs.confluent.io/2.0.0/schema-registry/docs/api.html#compatibility
57
+ def compatible?(subject, schema, version = 'latest')
58
+ data = post("/compatibility/subjects/#{subject}/versions/#{version}",
59
+ expects: [200, 404],
60
+ body: { schema: schema.to_s }.to_json)
61
+ data.fetch('is_compatible', false) unless data.has_key?('error_code')
62
+ end
63
+
64
+ private
65
+
66
+ def get(path, **options)
67
+ request(path, method: :get, **options)
68
+ end
69
+
70
+ def post(path, **options)
71
+ request(path, method: :post, **options)
72
+ end
73
+
74
+ def request(path, **options)
75
+ options = { expects: 200 }.merge!(options)
76
+ response = @connection.request(path: path, **options)
77
+ JSON.parse(response.body)
78
+ end
79
+ end
@@ -0,0 +1,58 @@
1
+ class AvroTurf::SchemaStore
2
+ def initialize(path: nil)
3
+ @path = path or raise "Please specify a schema path"
4
+ @schemas = Hash.new
5
+ end
6
+
7
+ # Resolves and returns a schema.
8
+ #
9
+ # schema_name - The String name of the schema to resolve.
10
+ #
11
+ # Returns an Avro::Schema.
12
+ def find(name, namespace = nil)
13
+ fullname = Avro::Name.make_fullname(name, namespace)
14
+
15
+ return @schemas[fullname] if @schemas.key?(fullname)
16
+
17
+ *namespace, schema_name = fullname.split(".")
18
+ schema_path = File.join(@path, *namespace, schema_name + ".avsc")
19
+ schema_json = JSON.parse(File.read(schema_path))
20
+ schema = Avro::Schema.real_parse(schema_json, @schemas)
21
+
22
+ if schema.respond_to?(:fullname) && schema.fullname != fullname
23
+ raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
24
+ end
25
+
26
+ schema
27
+ rescue ::Avro::SchemaParseError => e
28
+ # This is a hack in order to figure out exactly which type was missing. The
29
+ # Avro gem ought to provide this data directly.
30
+ if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
31
+ find($1)
32
+
33
+ # Re-resolve the original schema now that the dependency has been resolved.
34
+ @schemas.delete(fullname)
35
+ find(fullname)
36
+ else
37
+ raise
38
+ end
39
+ rescue Errno::ENOENT, Errno::ENAMETOOLONG
40
+ raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
41
+ end
42
+
43
+ # Loads all schema definition files in the `schemas_dir`.
44
+ def load_schemas!
45
+ pattern = [@path, "**", "*.avsc"].join("/")
46
+
47
+ Dir.glob(pattern) do |schema_path|
48
+ # Remove the path prefix.
49
+ schema_path.sub!(/^\/?#{@path}\//, "")
50
+
51
+ # Replace `/` with `.` and chop off the file extension.
52
+ schema_name = File.basename(schema_path.tr("/", "."), ".avsc")
53
+
54
+ # Load and cache the schema.
55
+ find(schema_name)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,52 @@
1
+ class AvroTurf
2
+ module AvroGemPatch
3
+ module RecordSchema
4
+ module ClassMethods
5
+ def make_field_objects(field_data, names, namespace=nil)
6
+ new_field_data = []
7
+ field_data.each do |field|
8
+ if field.respond_to?(:[]) && !field.key?('default')
9
+ field = field.clone
10
+ field['default'] = :no_default
11
+ end
12
+ new_field_data << field
13
+ end
14
+ super(new_field_data, names, namespace)
15
+ end
16
+ end
17
+
18
+ def self.prepended(base)
19
+ class << base
20
+ prepend ClassMethods
21
+ end
22
+ end
23
+ end
24
+
25
+ module Field
26
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil)
27
+ super(type, name, default, order, names, namespace)
28
+ end
29
+
30
+ def to_avro(names=Set.new)
31
+ {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
32
+ avro['default'] = default unless default == :no_default
33
+ avro['order'] = order if order
34
+ end
35
+ end
36
+ end
37
+
38
+ module DatumReader
39
+ def read_default_value(field_schema, default_value)
40
+ if default_value == :no_default
41
+ raise Avro::AvroError, "Missing data for #{field_schema} with no default"
42
+ end
43
+
44
+ super
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ Avro::Schema::RecordSchema.send(:prepend, AvroTurf::AvroGemPatch::RecordSchema)
51
+ Avro::Schema::Field.send(:prepend, AvroTurf::AvroGemPatch::Field)
52
+ Avro::IO::DatumReader.send(:prepend, AvroTurf::AvroGemPatch::DatumReader)
@@ -0,0 +1,84 @@
1
+ require 'sinatra/base'
2
+
3
+ class FakeSchemaRegistryServer < Sinatra::Base
4
+ SUBJECTS = Hash.new { Array.new }
5
+ SCHEMAS = []
6
+ SUBJECT_NOT_FOUND = { error_code: 40401, message: 'Subject not found' }.to_json.freeze
7
+ VERSION_NOT_FOUND = { error_code: 40402, message: 'Version not found' }.to_json.freeze
8
+ SCHEMA_NOT_FOUND = { error_code: 40403, message: 'Schema not found' }.to_json.freeze
9
+
10
+ helpers do
11
+ def parse_schema
12
+ request.body.rewind
13
+ JSON.parse(request.body.read).fetch("schema").tap do |schema|
14
+ Avro::Schema.parse(schema)
15
+ end
16
+ end
17
+ end
18
+
19
+ post "/subjects/:subject/versions" do
20
+ SCHEMAS << parse_schema
21
+
22
+ schema_id = SCHEMAS.size - 1
23
+ SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
24
+ { id: schema_id }.to_json
25
+ end
26
+
27
+ get "/schemas/ids/:schema_id" do
28
+ schema = SCHEMAS.at(params[:schema_id].to_i)
29
+ halt(404, SCHEMA_NOT_FOUND) unless schema
30
+ { schema: schema }.to_json
31
+ end
32
+
33
+ get "/subjects" do
34
+ SUBJECTS.keys.to_json
35
+ end
36
+
37
+ get "/subjects/:subject/versions" do
38
+ schema_ids = SUBJECTS[params[:subject]]
39
+ halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
40
+ (1..schema_ids.size).to_a.to_json
41
+ end
42
+
43
+ get "/subjects/:subject/versions/:version" do
44
+ schema_ids = SUBJECTS[params[:subject]]
45
+ halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
46
+
47
+ schema_id = if params[:version] == 'latest'
48
+ schema_ids.last
49
+ else
50
+ schema_ids.at(Integer(params[:version]) - 1)
51
+ end
52
+ halt(404, VERSION_NOT_FOUND) unless schema_id
53
+
54
+ schema = SCHEMAS.at(schema_id)
55
+
56
+ {
57
+ name: params[:subject],
58
+ version: schema_ids.index(schema_id) + 1,
59
+ schema: schema
60
+ }.to_json
61
+ end
62
+
63
+ post "/subjects/:subject" do
64
+ schema = parse_schema
65
+
66
+ # Note: this does not actually handle the same schema registered under
67
+ # multiple subjects
68
+ schema_id = SCHEMAS.index(schema)
69
+
70
+ halt(404, SCHEMA_NOT_FOUND) unless schema_id
71
+
72
+ {
73
+ subject: params[:subject],
74
+ id: schema_id,
75
+ version: SUBJECTS[params[:subject]].index(schema_id) + 1,
76
+ schema: schema
77
+ }.to_json
78
+ end
79
+
80
+ def self.clear
81
+ SUBJECTS.clear
82
+ SCHEMAS.clear
83
+ end
84
+ end
@@ -0,0 +1,3 @@
1
+ class AvroTurf
2
+ VERSION = "0.7.3"
3
+ end
data/perf/address.avsc ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "address",
3
+ "type": "record",
4
+ "fields": [
5
+ {
6
+ "name": "street",
7
+ "type": "string"
8
+ },
9
+ {
10
+ "name": "city",
11
+ "type": "string"
12
+ }
13
+ ]
14
+ }