avro_turf 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d083d0ed365ddbd16e9a5e35acf698d50a7bd7d5
4
- data.tar.gz: a297b67a288046a64932ad1e388cd39e4e7a106d
3
+ metadata.gz: bdd8c25f6656ec55bcb0054fa42e4a85d5018fa6
4
+ data.tar.gz: af4d189168531a28fc31793f8f3c65146c70e976
5
5
  SHA512:
6
- metadata.gz: de5d8c415fcf16b731aa9cd4b69c5e8fc4e2243b092744a637a68a36494083330b861731f7567a4677ba0d149c60a8da6cbce38e40ee32771a84cdb9c687e843
7
- data.tar.gz: 0e14a355dc76805b335d2fda8871671714a3843abe60886ef9d25ec5a4bad3270127d46386c2713e968a1eba47131753389b3cbb55531e8cafd0535ac14b2f39
6
+ metadata.gz: 039e85f46a0d25766bc24dabe3984e4f1fc6b4c535aaacf53a87d8db8ee308693c5c210eb7c550bcf752de2ab3048cbed06a4bb003e23123b0ea8dfb37c0e648
7
+ data.tar.gz: d40406ac87ac25fd476b82c14a8f732daf9067d26176aa2138d234264a8bb8c543ed3d55623161c4adb9320311296485297bb93a96459ac240739ceaa659df3f
data/README.md CHANGED
@@ -91,3 +91,28 @@ In the example above, the `person` schema references the `address` schema, even
91
91
  There's no reason to copy-paste the `person` schema into the `person_list` schema, as you can reference it directly.
92
92
 
93
93
  This feature helps avoid subtle errors when the same type is represented using slightly different schemas.
94
+
95
+
96
+ ### Using a Schema Registry
97
+
98
+ By default, AvroTurf will encode data in the Avro data file format. This means that the schema used to encode the data is prepended to the output. If you want to decrease the size of the output, e.g. when storing data in a log such as Apache Kafka or in a database, you can use the `AvroTurf::Messaging` API. This top-level API requires the use of [Schema Registry](https://github.com/confluentinc/schema-registry), a service which allows registering and fetching Avro schemas.
99
+
100
+ The Messaging API will automatically register schemas used for encoding data, and will fetch the corresponding schema when decoding. Instead of including the full schema in the output, only a schema id generated by the registry is included. Registering the same schema twice is idempotent, so no coordination is needed.
101
+
102
+ **NOTE:** The Messaging format is _not_ compatible with the Avro data file API.
103
+
104
+ Using the Messaging API is simple once you have set up a Schema Registry service:
105
+
106
+ ```ruby
107
+ # You need to pass the URL of your Schema Registry.
108
+ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
109
+
110
+ # The API for encoding and decoding data is similar to the default one. Encoding
111
+ # data has the side effect of registering the schema. This only happens the first
112
+ # time a schema is used.
113
+ data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
114
+
115
+ # When decoding, the schema will be fetched from the registry and cached. Subsequent
116
+ # instances of the same schema id will be served by the cache.
117
+ avro.decode(data) #=> { "title" => "hello, world" }
118
+ ```
data/avro_turf.gemspec CHANGED
@@ -18,9 +18,12 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ["lib"]
19
19
 
20
20
  spec.add_dependency "avro", "~> 1.7.7"
21
+ spec.add_dependency "excon", "~> 0.45.4"
21
22
 
22
23
  spec.add_development_dependency "bundler", "~> 1.7"
23
24
  spec.add_development_dependency "rake", "~> 10.0"
24
25
  spec.add_development_dependency "rspec", "~> 3.2.0"
25
- spec.add_development_dependency "fakefs"
26
+ spec.add_development_dependency "fakefs", "~> 0.6.7"
27
+ spec.add_development_dependency "webmock"
28
+ spec.add_development_dependency "sinatra"
26
29
  end
@@ -0,0 +1,16 @@
1
+ # Caches registrations and lookups to the schema registry in memory.
2
+ class AvroTurf::CachedSchemaRegistry
3
+ def initialize(upstream)
4
+ @upstream = upstream
5
+ @schemas_by_id = {}
6
+ @ids_by_schema = {}
7
+ end
8
+
9
+ def fetch(id)
10
+ @schemas_by_id[id] ||= @upstream.fetch(id)
11
+ end
12
+
13
+ def register(subject, schema)
14
+ @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
15
+ end
16
+ end
@@ -0,0 +1,95 @@
1
+ require 'logger'
2
+ require 'avro_turf'
3
+ require 'avro_turf/schema_store'
4
+ require 'avro_turf/schema_registry'
5
+ require 'avro_turf/cached_schema_registry'
6
+
7
+ class AvroTurf
8
+
9
+ # Provides a way to encode and decode messages without having to embed schemas
10
+ # in the encoded data. Confluent's Schema Registry[1] is used to register
11
+ # a schema when encoding a message -- the registry will issue a schema id that
12
+ # will be included in the encoded data alongside the actual message. When
13
+ # decoding the data, the schema id will be used to look up the writer's schema
14
+ # from the registry.
15
+ #
16
+ # 1: https://github.com/confluentinc/schema-registry
17
+ class Messaging
18
+ MAGIC_BYTE = [0].pack("C").freeze
19
+
20
+ # Instantiate a new Messaging instance with the given configuration.
21
+ #
22
+ # registry_url - The String URL of the schema registry that should be used.
23
+ # schemas_path - The String file system path where local schemas are stored.
24
+ # namespace - The String default schema namespace.
25
+ # logger - The Logger that should be used to log information (optional).
26
+ def initialize(registry_url: nil, schemas_path: nil, namespace: nil, logger: nil)
27
+ @logger = logger || Logger.new($stderr)
28
+ @namespace = namespace
29
+ @schema_store = SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
30
+ @registry = CachedSchemaRegistry.new(SchemaRegistry.new(registry_url, logger: @logger))
31
+ end
32
+
33
+ # Encodes a message using the specified schema.
34
+ #
35
+ # message - The message that should be encoded. Must be compatible with
36
+ # the schema.
37
+ # schema_name - The String name of the schema that should be used to encode
38
+ # the data.
39
+ # namespace - The namespace of the schema (optional).
40
+ #
41
+ # Returns the encoded data as a String.
42
+ def encode(message, schema_name: nil, namespace: @namespace)
43
+ schema = @schema_store.find(schema_name, namespace)
44
+
45
+ # Schemas are registered under the full name of the top level Avro record
46
+ # type.
47
+ schema_id = @registry.register(schema.fullname, schema)
48
+
49
+ stream = StringIO.new
50
+ writer = Avro::IO::DatumWriter.new(schema)
51
+ encoder = Avro::IO::BinaryEncoder.new(stream)
52
+
53
+ # Always start with the magic byte.
54
+ encoder.write(MAGIC_BYTE)
55
+
56
+ # The schema id is encoded as a 4-byte big-endian integer.
57
+ encoder.write([schema_id].pack("N"))
58
+
59
+ # The actual message comes last.
60
+ writer.write(message, encoder)
61
+
62
+ stream.string
63
+ end
64
+
65
+ # Decodes data into the original message.
66
+ #
67
+ # data - A String containing encoded data.
68
+ # schema_name - The String name of the schema that should be used to decode
69
+ # the data. Must match the schema used when encoding (optional).
70
+ # namespace - The namespace of the schema (optional).
71
+ #
72
+ # Returns the decoded message.
73
+ def decode(data, schema_name: nil, namespace: @namespace)
74
+ readers_schema = schema_name && @schema_store.find(schema_name, namespace)
75
+ stream = StringIO.new(data)
76
+ decoder = Avro::IO::BinaryDecoder.new(stream)
77
+
78
+ # The first byte is MAGIC!!!
79
+ magic_byte = decoder.read(1)
80
+
81
+ if magic_byte != MAGIC_BYTE
82
+ raise "Expected data to begin with a magic byte, got `#{magic_byte.inspect}`"
83
+ end
84
+
85
+ # The schema id is a 4-byte big-endian integer.
86
+ schema_id = decoder.read(4).unpack("N").first
87
+
88
+ writers_schema_json = @registry.fetch(schema_id)
89
+ writers_schema = Avro::Schema.parse(writers_schema_json)
90
+
91
+ reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
92
+ reader.read(decoder)
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,45 @@
1
+ require 'excon'
2
+
3
+ class AvroTurf::SchemaRegistry
4
+ CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
5
+
6
+ def initialize(url, logger: Logger.new($stdout))
7
+ @logger = logger
8
+ @connection = Excon.new(url, headers: {
9
+ "Content-Type" => CONTENT_TYPE,
10
+ })
11
+ end
12
+
13
+ def fetch(id)
14
+ @logger.info "Fetching schema with id #{id}"
15
+ data = get("/schemas/ids/#{id}")
16
+ data.fetch("schema")
17
+ end
18
+
19
+ def register(subject, schema)
20
+ data = post("/subjects/#{subject}/versions", body: {
21
+ schema: schema
22
+ }.to_json)
23
+
24
+ id = data.fetch("id")
25
+
26
+ @logger.info "Registered schema for subject `#{subject}`; id = #{id}"
27
+
28
+ id
29
+ end
30
+
31
+ private
32
+
33
+ def get(path, **options)
34
+ request(path, method: :get, **options)
35
+ end
36
+
37
+ def post(path, **options)
38
+ request(path, method: :post, **options)
39
+ end
40
+
41
+ def request(path, **options)
42
+ response = @connection.request(path: path, expects: 200, **options)
43
+ JSON.parse(response.body)
44
+ end
45
+ end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
@@ -0,0 +1,25 @@
1
+ require 'sinatra/base'
2
+
3
+ class FakeSchemaRegistryServer < Sinatra::Base
4
+ SCHEMAS = []
5
+
6
+ post "/subjects/:subject/versions" do
7
+ request.body.rewind
8
+ schema = JSON.parse(request.body.read).fetch("schema")
9
+
10
+ SCHEMAS << schema
11
+ schema_id = SCHEMAS.size - 1
12
+
13
+ { id: schema_id }.to_json
14
+ end
15
+
16
+ get "/schemas/ids/:schema_id" do
17
+ schema = SCHEMAS.at(params[:schema_id].to_i)
18
+
19
+ { schema: schema }.to_json
20
+ end
21
+
22
+ def self.clear
23
+ SCHEMAS.clear
24
+ end
25
+ end
@@ -0,0 +1,52 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/messaging'
3
+ require_relative 'fake_schema_registry_server'
4
+
5
+ describe AvroTurf::Messaging do
6
+ let(:registry_url) { "http://registry.example.com" }
7
+ let(:logger) { Logger.new(StringIO.new) }
8
+
9
+ let(:avro) {
10
+ AvroTurf::Messaging.new(
11
+ registry_url: registry_url,
12
+ schemas_path: "spec/schemas",
13
+ logger: logger
14
+ )
15
+ }
16
+
17
+ before do
18
+ FileUtils.mkdir_p("spec/schemas")
19
+ end
20
+
21
+ before do
22
+ stub_request(:any, /^#{registry_url}/).to_rack(FakeSchemaRegistryServer)
23
+ FakeSchemaRegistryServer.clear
24
+ end
25
+
26
+ before do
27
+ define_schema "person.avsc", <<-AVSC
28
+ {
29
+ "name": "person",
30
+ "type": "record",
31
+ "fields": [
32
+ {
33
+ "type": "string",
34
+ "name": "full_name"
35
+ }
36
+ ]
37
+ }
38
+ AVSC
39
+ end
40
+
41
+ it "encodes and decodes messages" do
42
+ message = { "full_name" => "John Doe" }
43
+ data = avro.encode(message, schema_name: "person")
44
+ expect(avro.decode(data)).to eq message
45
+ end
46
+
47
+ it "allows specifying a reader's schema" do
48
+ message = { "full_name" => "John Doe" }
49
+ data = avro.encode(message, schema_name: "person")
50
+ expect(avro.decode(data, schema_name: "person")).to eq message
51
+ end
52
+ end
@@ -0,0 +1,32 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/schema_registry'
3
+ require_relative 'fake_schema_registry_server'
4
+
5
+ describe AvroTurf::SchemaRegistry do
6
+ let(:registry_url) { "http://registry.example.com" }
7
+
8
+ before do
9
+ stub_request(:any, /^#{registry_url}/).to_rack(FakeSchemaRegistryServer)
10
+ FakeSchemaRegistryServer.clear
11
+ end
12
+
13
+ it "allows registering a schema" do
14
+ logger = Logger.new(StringIO.new)
15
+ registry = described_class.new(registry_url, logger: logger)
16
+
17
+ schema = <<-JSON
18
+ {
19
+ "type": "record",
20
+ "name": "person",
21
+ "fields": [
22
+ { "name": "name", "type": "string" }
23
+ ]
24
+ }
25
+ JSON
26
+
27
+ id = registry.register("some-subject", schema)
28
+ fetched_schema = registry.fetch(id)
29
+
30
+ expect(JSON.parse(fetched_schema)).to eq JSON.parse(schema)
31
+ end
32
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'bundler/setup'
2
+ require 'logger'
2
3
  require 'fakefs/spec_helpers'
3
4
  require 'avro_turf'
4
5
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.7.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: excon
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.45.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.45.4
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -68,6 +82,34 @@ dependencies:
68
82
  version: 3.2.0
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: fakefs
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.6.7
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.6.7
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: sinatra
71
113
  requirement: !ruby/object:Gem::Requirement
72
114
  requirements:
73
115
  - - ">="
@@ -96,6 +138,7 @@ files:
96
138
  - avro_turf.gemspec
97
139
  - circle.yml
98
140
  - lib/avro_turf.rb
141
+ - lib/avro_turf/cached_schema_registry.rb
99
142
  - lib/avro_turf/core_ext.rb
100
143
  - lib/avro_turf/core_ext/date.rb
101
144
  - lib/avro_turf/core_ext/enumerable.rb
@@ -107,6 +150,8 @@ files:
107
150
  - lib/avro_turf/core_ext/symbol.rb
108
151
  - lib/avro_turf/core_ext/time.rb
109
152
  - lib/avro_turf/core_ext/true_class.rb
153
+ - lib/avro_turf/messaging.rb
154
+ - lib/avro_turf/schema_registry.rb
110
155
  - lib/avro_turf/schema_store.rb
111
156
  - lib/avro_turf/version.rb
112
157
  - perf/address.avsc
@@ -124,6 +169,9 @@ files:
124
169
  - spec/core_ext/symbol_spec.rb
125
170
  - spec/core_ext/time_spec.rb
126
171
  - spec/core_ext/true_class_spec.rb
172
+ - spec/fake_schema_registry_server.rb
173
+ - spec/messaging_spec.rb
174
+ - spec/schema_registry_spec.rb
127
175
  - spec/schema_store_spec.rb
128
176
  - spec/spec_helper.rb
129
177
  homepage: https://github.com/dasch/avro_turf
@@ -163,6 +211,9 @@ test_files:
163
211
  - spec/core_ext/symbol_spec.rb
164
212
  - spec/core_ext/time_spec.rb
165
213
  - spec/core_ext/true_class_spec.rb
214
+ - spec/fake_schema_registry_server.rb
215
+ - spec/messaging_spec.rb
216
+ - spec/schema_registry_spec.rb
166
217
  - spec/schema_store_spec.rb
167
218
  - spec/spec_helper.rb
168
219
  has_rdoc: