avro_turf 0.11.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +5 -1
- data/.github/workflows/stale.yml +19 -0
- data/CHANGELOG.md +23 -1
- data/README.md +60 -0
- data/avro_turf.gemspec +2 -2
- data/lib/avro_turf/cached_confluent_schema_registry.rb +2 -0
- data/lib/avro_turf/confluent_schema_registry.rb +5 -1
- data/lib/avro_turf/disk_cache.rb +32 -9
- data/lib/avro_turf/in_memory_cache.rb +2 -2
- data/lib/avro_turf/messaging.rb +27 -14
- data/lib/avro_turf/schema_store.rb +33 -10
- data/lib/avro_turf/version.rb +1 -1
- data/spec/confluent_schema_registry_spec.rb +12 -0
- data/spec/disk_cached_confluent_schema_registry_spec.rb +71 -1
- data/spec/messaging_spec.rb +102 -0
- data/spec/schema_store_spec.rb +98 -0
- metadata +8 -9
- data/.circleci/config.yml +0 -36
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4e9638703b961c348d06adcfa3e34ac2bd908e68f9cf7762a550ae4ef453b8a
|
4
|
+
data.tar.gz: 6d53b1f9b5e4b9e5a3aaeb13207bf350e2b56c8db138cb499643180a94a684b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7045f852f20d3ddeca3256724c918ed255722a1556f662d159b805921670d0a2af3e574e3c967bee7622d5d3e05f5348d6023b250006c3651cb7b1f9cec059bf
|
7
|
+
data.tar.gz: 50ca19fb058246ba19b28472e03bedcfbc09ddcd336d46deb29dec4b406d81a65c2b0f6ca84ec36d5227da035906976af133cf757cbfc2f20abda116c8e17d8c
|
data/.github/workflows/ruby.yml
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
name: Ruby
|
2
2
|
|
3
|
-
on: [push]
|
3
|
+
on: [push, pull_request]
|
4
4
|
|
5
5
|
jobs:
|
6
6
|
build:
|
7
7
|
|
8
8
|
runs-on: ubuntu-latest
|
9
|
+
strategy:
|
10
|
+
matrix:
|
11
|
+
ruby-version: [1.8.x, 1.9.x, 2.0.x, 2.1.x, 2.2.x,
|
12
|
+
2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
|
9
13
|
|
10
14
|
steps:
|
11
15
|
- uses: actions/checkout@v1
|
@@ -0,0 +1,19 @@
|
|
1
|
+
name: Mark stale issues and pull requests
|
2
|
+
|
3
|
+
on:
|
4
|
+
schedule:
|
5
|
+
- cron: "0 0 * * *"
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
stale:
|
9
|
+
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/stale@v1
|
14
|
+
with:
|
15
|
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
16
|
+
stale-issue-message: 'Stale issue message'
|
17
|
+
stale-pr-message: 'Stale pull request message'
|
18
|
+
stale-issue-label: 'no-issue-activity'
|
19
|
+
stale-pr-label: 'no-pr-activity'
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,29 @@
|
|
1
|
-
#
|
1
|
+
# AvroTurf
|
2
2
|
|
3
3
|
## Unreleased
|
4
4
|
|
5
|
+
## v1.3.1
|
6
|
+
|
7
|
+
- Prevent CachedConfluentSchemaRegistry from caching the 'latest' version (#140)
|
8
|
+
- Fix issue with zero length schema cache file (#138)
|
9
|
+
|
10
|
+
## v1.3.0
|
11
|
+
|
12
|
+
- Add support for plain user/password auth to ConfluentSchemaRegistry (#120)
|
13
|
+
|
14
|
+
## v1.2.0
|
15
|
+
|
16
|
+
- Expose `fetch_schema`, `fetch_schema_by_id` and `register_schema` schema in `Messaging` interface (#117, #119)
|
17
|
+
- Add ability to validate message before encoding in `Messaging#encode` interface (#116, #118)
|
18
|
+
|
19
|
+
## v1.1.0
|
20
|
+
|
21
|
+
- Compatibility with Avro v1.10.x.
|
22
|
+
|
23
|
+
## v1.0.0
|
24
|
+
|
25
|
+
- Stop caching nested sub-schemas (#111)
|
26
|
+
|
5
27
|
## v0.11.0
|
6
28
|
|
7
29
|
- Add proxy support (#107)
|
data/README.md
CHANGED
@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
|
|
16
16
|
|
17
17
|
The aliases for the original names will be removed in a future release.
|
18
18
|
|
19
|
+
## Note about finding nested schemas
|
20
|
+
|
21
|
+
As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
|
22
|
+
|
23
|
+
As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
|
24
|
+
|
25
|
+
```json
|
26
|
+
{
|
27
|
+
"name": "person",
|
28
|
+
"namespace": "contacts",
|
29
|
+
"type": "record",
|
30
|
+
"fields": [
|
31
|
+
{
|
32
|
+
"name": "address",
|
33
|
+
"type": {
|
34
|
+
"name": "address",
|
35
|
+
"type": "record",
|
36
|
+
"fields": [
|
37
|
+
{ "name": "addr1", "type": "string" },
|
38
|
+
{ "name": "addr2", "type": "string" },
|
39
|
+
{ "name": "city", "type": "string" },
|
40
|
+
{ "name": "zip", "type": "string" }
|
41
|
+
]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
]
|
45
|
+
}
|
46
|
+
```
|
47
|
+
...this will no longer work in v0.12.0:
|
48
|
+
```ruby
|
49
|
+
store = AvroTurf::SchemaStore.new(path: 'my/schemas')
|
50
|
+
store.load_schemas!
|
51
|
+
|
52
|
+
# Accessing 'person' is correct and works fine.
|
53
|
+
person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
|
54
|
+
|
55
|
+
# Trying to access 'address' raises AvroTurf::SchemaNotFoundError
|
56
|
+
address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
|
57
|
+
```
|
58
|
+
|
59
|
+
For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
|
60
|
+
|
19
61
|
## Installation
|
20
62
|
|
21
63
|
Add this line to your application's Gemfile:
|
@@ -136,6 +178,10 @@ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version:
|
|
136
178
|
# of the same schema version will be served by the cache.
|
137
179
|
data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
|
138
180
|
|
181
|
+
# Message can be validated before encoding to get a description of problem through
|
182
|
+
# Avro::SchemaValidator::ValidationError exception
|
183
|
+
data = avro.encode({ "titl" => "hello, world" }, schema_name: "greeting", validate: true)
|
184
|
+
|
139
185
|
# When decoding, the schema will be fetched from the registry and cached. Subsequent
|
140
186
|
# instances of the same schema id will be served by the cache.
|
141
187
|
avro.decode(data) #=> { "title" => "hello, world" }
|
@@ -147,6 +193,20 @@ result.message #=> { "title" => "hello, world" }
|
|
147
193
|
result.schema_id #=> 3
|
148
194
|
result.writer_schema #=> #<Avro::Schema: ...>
|
149
195
|
result.reader_schema #=> nil
|
196
|
+
|
197
|
+
# You can also work with schema through this interface:
|
198
|
+
# Fetch latest schema for subject from registry
|
199
|
+
schema, schema_id = avro.fetch_schema(subject: 'greeting')
|
200
|
+
# Fetch specific version
|
201
|
+
schema, schema_id = avro.fetch_schema(subject: 'greeting', version: 1)
|
202
|
+
# Fetch schema by id
|
203
|
+
schema, schema_id = avro.fetch_schema_by_id(3)
|
204
|
+
# Register schema fetched from store by name
|
205
|
+
schema, schema_id = avro.register_schema(schema_name: 'greeting')
|
206
|
+
# Specify namespace (same as schema_name: 'somewhere.greeting')
|
207
|
+
schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere')
|
208
|
+
# Customize subject under which to register schema
|
209
|
+
schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere', subject: 'test')
|
150
210
|
```
|
151
211
|
|
152
212
|
### Confluent Schema Registry Client
|
data/avro_turf.gemspec
CHANGED
@@ -17,8 +17,8 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
|
20
|
-
spec.add_dependency "avro", ">= 1.7.7", "< 1.
|
21
|
-
spec.add_dependency "excon", "~> 0.
|
20
|
+
spec.add_dependency "avro", ">= 1.7.7", "< 1.11"
|
21
|
+
spec.add_dependency "excon", "~> 0.71"
|
22
22
|
|
23
23
|
spec.add_development_dependency "bundler", "~> 2.0"
|
24
24
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -33,6 +33,8 @@ class AvroTurf::CachedConfluentSchemaRegistry
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def subject_version(subject, version = 'latest')
|
36
|
+
return @upstream.subject_version(subject, version) if version == 'latest'
|
37
|
+
|
36
38
|
@cache.lookup_by_version(subject, version) ||
|
37
39
|
@cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
|
38
40
|
end
|
@@ -7,6 +7,8 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
7
7
|
url,
|
8
8
|
logger: Logger.new($stdout),
|
9
9
|
proxy: nil,
|
10
|
+
user: nil,
|
11
|
+
password: nil,
|
10
12
|
client_cert: nil,
|
11
13
|
client_key: nil,
|
12
14
|
client_key_pass: nil,
|
@@ -17,10 +19,12 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
17
19
|
headers = {
|
18
20
|
"Content-Type" => CONTENT_TYPE
|
19
21
|
}
|
20
|
-
headers[:proxy] = proxy
|
22
|
+
headers[:proxy] = proxy unless proxy.nil?
|
21
23
|
@connection = Excon.new(
|
22
24
|
url,
|
23
25
|
headers: headers,
|
26
|
+
user: user,
|
27
|
+
password: password,
|
24
28
|
client_cert: client_cert,
|
25
29
|
client_key: client_key,
|
26
30
|
client_key_pass: client_key_pass,
|
data/lib/avro_turf/disk_cache.rb
CHANGED
@@ -2,15 +2,19 @@
|
|
2
2
|
# Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
|
3
3
|
class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
4
4
|
|
5
|
-
def initialize(disk_path)
|
5
|
+
def initialize(disk_path, logger: Logger.new($stdout))
|
6
6
|
super()
|
7
7
|
|
8
|
+
@logger = logger
|
9
|
+
|
8
10
|
# load the write-thru cache on startup, if it exists
|
9
11
|
@schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
|
10
|
-
|
12
|
+
hash = read_from_disk_cache(@schemas_by_id_path)
|
13
|
+
@schemas_by_id = hash if hash
|
11
14
|
|
12
15
|
@ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
|
13
|
-
|
16
|
+
hash = read_from_disk_cache(@ids_by_schema_path)
|
17
|
+
@ids_by_schema = hash if hash
|
14
18
|
|
15
19
|
@schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
|
16
20
|
@schemas_by_subject_version = {}
|
@@ -31,12 +35,18 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
31
35
|
return value
|
32
36
|
end
|
33
37
|
|
34
|
-
# override to
|
38
|
+
# override to use a json serializable cache key
|
39
|
+
def lookup_by_schema(subject, schema)
|
40
|
+
key = "#{subject}#{schema}"
|
41
|
+
@ids_by_schema[key]
|
42
|
+
end
|
43
|
+
|
44
|
+
# override to use a json serializable cache key and update the file cache
|
35
45
|
def store_by_schema(subject, schema, id)
|
36
|
-
|
37
|
-
|
46
|
+
key = "#{subject}#{schema}"
|
47
|
+
@ids_by_schema[key] = id
|
38
48
|
File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
|
39
|
-
|
49
|
+
id
|
40
50
|
end
|
41
51
|
|
42
52
|
# checks instance var (in-memory cache) for schema
|
@@ -49,7 +59,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
49
59
|
|
50
60
|
return schema unless schema.nil?
|
51
61
|
|
52
|
-
hash =
|
62
|
+
hash = read_from_disk_cache(@schemas_by_subject_version_path)
|
53
63
|
if hash
|
54
64
|
@schemas_by_subject_version = hash
|
55
65
|
@schemas_by_subject_version[key]
|
@@ -63,7 +73,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
63
73
|
# update instance var (in memory-cache) to match
|
64
74
|
def store_by_version(subject, version, schema)
|
65
75
|
key = "#{subject}#{version}"
|
66
|
-
hash =
|
76
|
+
hash = read_from_disk_cache(@schemas_by_subject_version_path)
|
67
77
|
hash = if hash
|
68
78
|
hash[key] = schema
|
69
79
|
hash
|
@@ -77,6 +87,19 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
77
87
|
@schemas_by_subject_version[key]
|
78
88
|
end
|
79
89
|
|
90
|
+
# Parse the file from disk, if it exists and is not zero length
|
91
|
+
private def read_from_disk_cache(path)
|
92
|
+
if File.exist?(path)
|
93
|
+
if File.size(path)!=0
|
94
|
+
return JSON.parse(File.read(path))
|
95
|
+
else
|
96
|
+
# just log a message if skipping zero length file
|
97
|
+
@logger.warn "skipping JSON.parse of zero length file at #{path}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return nil
|
101
|
+
end
|
102
|
+
|
80
103
|
private def write_to_disk_cache(path, hash)
|
81
104
|
File.write(path, JSON.pretty_generate(hash))
|
82
105
|
end
|
@@ -17,12 +17,12 @@ class AvroTurf::InMemoryCache
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def lookup_by_schema(subject, schema)
|
20
|
-
key = subject
|
20
|
+
key = [subject, schema.to_s]
|
21
21
|
@ids_by_schema[key]
|
22
22
|
end
|
23
23
|
|
24
24
|
def store_by_schema(subject, schema, id)
|
25
|
-
key = subject
|
25
|
+
key = [subject, schema.to_s]
|
26
26
|
@ids_by_schema[key] = id
|
27
27
|
end
|
28
28
|
|
data/lib/avro_turf/messaging.rb
CHANGED
@@ -34,6 +34,8 @@ class AvroTurf
|
|
34
34
|
# namespace - The String default schema namespace.
|
35
35
|
# logger - The Logger that should be used to log information (optional).
|
36
36
|
# proxy - Forward the request via proxy (optional).
|
37
|
+
# user - User for basic auth (optional).
|
38
|
+
# password - Password for basic auth (optional).
|
37
39
|
# client_cert - Name of file containing client certificate (optional).
|
38
40
|
# client_key - Name of file containing client private key to go with client_cert (optional).
|
39
41
|
# client_key_pass - Password to go with client_key (optional).
|
@@ -47,6 +49,8 @@ class AvroTurf
|
|
47
49
|
namespace: nil,
|
48
50
|
logger: nil,
|
49
51
|
proxy: nil,
|
52
|
+
user: nil,
|
53
|
+
password: nil,
|
50
54
|
client_cert: nil,
|
51
55
|
client_key: nil,
|
52
56
|
client_key_pass: nil,
|
@@ -61,6 +65,8 @@ class AvroTurf
|
|
61
65
|
registry_url,
|
62
66
|
logger: @logger,
|
63
67
|
proxy: proxy,
|
68
|
+
user: user,
|
69
|
+
password: password,
|
64
70
|
client_cert: client_cert,
|
65
71
|
client_key: client_key,
|
66
72
|
client_key_pass: client_key_pass,
|
@@ -84,19 +90,26 @@ class AvroTurf
|
|
84
90
|
# the data. Must match the schema used when encoding (optional).
|
85
91
|
# schema_id - The integer id of the schema that should be used to encode
|
86
92
|
# the data.
|
93
|
+
# validate - The boolean for performing complete message validation before
|
94
|
+
# encoding it, Avro::SchemaValidator::ValidationError with
|
95
|
+
# a descriptive message will be raised in case of invalid message.
|
87
96
|
#
|
88
97
|
# Returns the encoded data as a String.
|
89
|
-
def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil)
|
90
|
-
|
98
|
+
def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
|
99
|
+
schema, schema_id = if schema_id
|
91
100
|
fetch_schema_by_id(schema_id)
|
92
101
|
elsif subject && version
|
93
|
-
fetch_schema(subject, version)
|
102
|
+
fetch_schema(subject: subject, version: version)
|
94
103
|
elsif schema_name
|
95
|
-
register_schema(subject, schema_name, namespace)
|
104
|
+
register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
|
96
105
|
else
|
97
106
|
raise ArgumentError.new('Neither schema_name nor schema_id nor subject + version provided to determine the schema.')
|
98
107
|
end
|
99
108
|
|
109
|
+
if validate
|
110
|
+
Avro::SchemaValidator.validate!(schema, message, recursive: true, encoded: false, fail_on_extra_fields: true)
|
111
|
+
end
|
112
|
+
|
100
113
|
stream = StringIO.new
|
101
114
|
writer = Avro::IO::DatumWriter.new(schema)
|
102
115
|
encoder = Avro::IO::BinaryEncoder.new(stream)
|
@@ -111,7 +124,7 @@ class AvroTurf
|
|
111
124
|
writer.write(message, encoder)
|
112
125
|
|
113
126
|
stream.string
|
114
|
-
rescue Excon::
|
127
|
+
rescue Excon::Errors::NotFound
|
115
128
|
if schema_id
|
116
129
|
raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
|
117
130
|
else
|
@@ -169,31 +182,31 @@ class AvroTurf
|
|
169
182
|
raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
|
170
183
|
end
|
171
184
|
|
172
|
-
private
|
173
|
-
|
174
185
|
# Providing subject and version to determine the schema,
|
175
186
|
# which skips the auto registeration of schema on the schema registry.
|
176
187
|
# Fetch the schema from registry with the provided subject name and version.
|
177
|
-
def fetch_schema(subject
|
188
|
+
def fetch_schema(subject:, version: 'latest')
|
178
189
|
schema_data = @registry.subject_version(subject, version)
|
179
190
|
schema_id = schema_data.fetch('id')
|
180
191
|
schema = Avro::Schema.parse(schema_data.fetch('schema'))
|
181
|
-
[
|
192
|
+
[schema, schema_id]
|
182
193
|
end
|
183
194
|
|
184
195
|
# Fetch the schema from registry with the provided schema_id.
|
185
196
|
def fetch_schema_by_id(schema_id)
|
186
|
-
|
187
|
-
|
188
|
-
|
197
|
+
schema = @schemas_by_id.fetch(schema_id) do
|
198
|
+
schema_json = @registry.fetch(schema_id)
|
199
|
+
Avro::Schema.parse(schema_json)
|
200
|
+
end
|
201
|
+
[schema, schema_id]
|
189
202
|
end
|
190
203
|
|
191
204
|
# Schemas are registered under the full name of the top level Avro record
|
192
205
|
# type, or `subject` if it's provided.
|
193
|
-
def register_schema(subject
|
206
|
+
def register_schema(schema_name:, subject: nil, namespace: nil)
|
194
207
|
schema = @schema_store.find(schema_name, namespace)
|
195
208
|
schema_id = @registry.register(subject || schema.fullname, schema)
|
196
|
-
[
|
209
|
+
[schema, schema_id]
|
197
210
|
end
|
198
211
|
end
|
199
212
|
end
|
@@ -22,7 +22,7 @@ class AvroTurf::SchemaStore
|
|
22
22
|
# Still need to check is the schema already loaded
|
23
23
|
return @schemas[fullname] if @schemas.key?(fullname)
|
24
24
|
|
25
|
-
load_schema!(fullname
|
25
|
+
load_schema!(fullname)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -42,34 +42,57 @@ class AvroTurf::SchemaStore
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
|
45
|
+
protected
|
46
46
|
|
47
47
|
# Loads single schema
|
48
48
|
# Such method is not thread-safe, do not call it of from mutex synchronization routine
|
49
|
-
def load_schema!(fullname,
|
50
|
-
|
51
|
-
schema_path = File.join(@path, *namespace, schema_name + ".avsc")
|
49
|
+
def load_schema!(fullname, local_schemas_cache = {})
|
50
|
+
schema_path = build_schema_path(fullname)
|
52
51
|
schema_json = JSON.parse(File.read(schema_path))
|
53
|
-
schema = Avro::Schema.real_parse(schema_json, @schemas)
|
54
52
|
|
53
|
+
schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
|
54
|
+
|
55
|
+
# Don't cache the parsed schema until after its fullname is validated
|
55
56
|
if schema.respond_to?(:fullname) && schema.fullname != fullname
|
56
57
|
raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
|
57
58
|
end
|
58
59
|
|
60
|
+
# Cache only this new top-level schema by its fullname. It's critical
|
61
|
+
# not to make every sub-schema resolvable at the top level here because
|
62
|
+
# multiple different avsc files may define the same sub-schema, and
|
63
|
+
# if we share the @schemas cache across all parsing contexts, the Avro
|
64
|
+
# gem will raise an Avro::SchemaParseError when parsing another avsc
|
65
|
+
# file that contains a subschema with the same fullname as one
|
66
|
+
# encountered previously in a different file:
|
67
|
+
# <Avro::SchemaParseError: The name "foo.bar" is already in use.>
|
68
|
+
# Essentially, the only schemas that should be resolvable in @schemas
|
69
|
+
# are those that have their own .avsc files on disk.
|
70
|
+
@schemas[fullname] = schema
|
71
|
+
|
59
72
|
schema
|
60
73
|
rescue ::Avro::SchemaParseError => e
|
61
74
|
# This is a hack in order to figure out exactly which type was missing. The
|
62
75
|
# Avro gem ought to provide this data directly.
|
63
76
|
if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
|
64
|
-
|
77
|
+
# Try to first resolve a referenced schema from disk.
|
78
|
+
# If this is successful, the Avro gem will have mutated the
|
79
|
+
# local_schemas_cache, adding all the new schemas it found.
|
80
|
+
load_schema!($1, local_schemas_cache)
|
65
81
|
|
66
|
-
#
|
67
|
-
|
68
|
-
|
82
|
+
# Attempt to re-parse the original schema now that the dependency
|
83
|
+
# has been resolved and use the now-updated local_schemas_cache to
|
84
|
+
# pick up where we left off.
|
85
|
+
local_schemas_cache.delete(fullname)
|
86
|
+
load_schema!(fullname, local_schemas_cache)
|
69
87
|
else
|
70
88
|
raise
|
71
89
|
end
|
72
90
|
rescue Errno::ENOENT, Errno::ENAMETOOLONG
|
73
91
|
raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
|
74
92
|
end
|
93
|
+
|
94
|
+
def build_schema_path(fullname)
|
95
|
+
*namespace, schema_name = fullname.split(".")
|
96
|
+
schema_path = File.join(@path, *namespace, schema_name + ".avsc")
|
97
|
+
end
|
75
98
|
end
|
data/lib/avro_turf/version.rb
CHANGED
@@ -3,6 +3,8 @@ require 'avro_turf/confluent_schema_registry'
|
|
3
3
|
require 'avro_turf/test/fake_confluent_schema_registry_server'
|
4
4
|
|
5
5
|
describe AvroTurf::ConfluentSchemaRegistry do
|
6
|
+
let(:user) { "abc" }
|
7
|
+
let(:password) { "xxyyzz" }
|
6
8
|
let(:client_cert) { "test client cert" }
|
7
9
|
let(:client_key) { "test client key" }
|
8
10
|
let(:client_key_pass) { "test client key password" }
|
@@ -18,4 +20,14 @@ describe AvroTurf::ConfluentSchemaRegistry do
|
|
18
20
|
)
|
19
21
|
}
|
20
22
|
end
|
23
|
+
|
24
|
+
it_behaves_like "a confluent schema registry client" do
|
25
|
+
let(:registry) {
|
26
|
+
described_class.new(
|
27
|
+
registry_url,
|
28
|
+
user: user,
|
29
|
+
password: password,
|
30
|
+
)
|
31
|
+
}
|
32
|
+
end
|
21
33
|
end
|
@@ -4,7 +4,8 @@ require 'avro_turf/test/fake_confluent_schema_registry_server'
|
|
4
4
|
|
5
5
|
describe AvroTurf::CachedConfluentSchemaRegistry do
|
6
6
|
let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
|
7
|
-
let(:
|
7
|
+
let(:logger_io) { StringIO.new }
|
8
|
+
let(:cache) { AvroTurf::DiskCache.new("spec/cache", logger: Logger.new(logger_io))}
|
8
9
|
let(:registry) { described_class.new(upstream, cache: cache) }
|
9
10
|
let(:id) { rand(999) }
|
10
11
|
let(:schema) do
|
@@ -80,6 +81,40 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
|
|
80
81
|
end
|
81
82
|
end
|
82
83
|
|
84
|
+
describe "#fetch (zero length cache file)" do
|
85
|
+
let(:cache_after) do
|
86
|
+
{
|
87
|
+
"#{id}" => "#{schema}"
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
before do
|
92
|
+
# setup the disk cache with a zero length file
|
93
|
+
File.write(File.join("spec/cache", "schemas_by_id.json"), '')
|
94
|
+
end
|
95
|
+
|
96
|
+
it "skips zero length disk cache" do
|
97
|
+
# multiple calls return same result, with only one upstream call
|
98
|
+
allow(upstream).to receive(:fetch).with(id).and_return(schema)
|
99
|
+
expect(registry.fetch(id)).to eq(schema)
|
100
|
+
expect(registry.fetch(id)).to eq(schema)
|
101
|
+
expect(upstream).to have_received(:fetch).exactly(1).times
|
102
|
+
expect(load_cache("schemas_by_id.json")).to eq cache_after
|
103
|
+
expect(logger_io.string).to include("zero length file at spec/cache/schemas_by_id.json")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "#fetch (corrupt cache file)" do
|
108
|
+
before do
|
109
|
+
# setup the disk cache with a corrupt file (i.e. not json)
|
110
|
+
File.write(File.join("spec/cache", "schemas_by_id.json"), 'NOTJSON')
|
111
|
+
end
|
112
|
+
|
113
|
+
it "raises error on corrupt cache file" do
|
114
|
+
expect{registry.fetch(id)}.to raise_error(JSON::ParserError, /unexpected token/)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
83
118
|
describe "#register" do
|
84
119
|
let(:subject_name) { "a_subject" }
|
85
120
|
let(:cache_before) do
|
@@ -120,6 +155,41 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
|
|
120
155
|
end
|
121
156
|
end
|
122
157
|
|
158
|
+
describe "#register (zero length cache file)" do
|
159
|
+
let(:subject_name) { "a_subject" }
|
160
|
+
let(:cache_after) do
|
161
|
+
{
|
162
|
+
"#{subject_name}#{schema}" => id
|
163
|
+
}
|
164
|
+
end
|
165
|
+
|
166
|
+
before do
|
167
|
+
# setup the disk cache with a zero length file
|
168
|
+
File.write(File.join("spec/cache", "ids_by_schema.json"), '')
|
169
|
+
end
|
170
|
+
|
171
|
+
it "skips zero length disk cache" do
|
172
|
+
# multiple calls return same result, with only one upstream call
|
173
|
+
allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
|
174
|
+
expect(registry.register(subject_name, schema)).to eq(id)
|
175
|
+
expect(registry.register(subject_name, schema)).to eq(id)
|
176
|
+
expect(upstream).to have_received(:register).exactly(1).times
|
177
|
+
expect(load_cache("ids_by_schema.json")).to eq cache_after
|
178
|
+
expect(logger_io.string).to include("zero length file at spec/cache/ids_by_schema.json")
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
describe "#register (corrupt cache file)" do
|
183
|
+
before do
|
184
|
+
# setup the disk cache with a corrupt file (i.e. not json)
|
185
|
+
File.write(File.join("spec/cache", "ids_by_schema.json"), 'NOTJSON')
|
186
|
+
end
|
187
|
+
|
188
|
+
it "raises error on corrupt cache file" do
|
189
|
+
expect{registry.register(subject_name, schema)}.to raise_error(JSON::ParserError, /unexpected token/)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
123
193
|
describe "#subject_version" do
|
124
194
|
it "writes thru to disk cache" do
|
125
195
|
# multiple calls return same result, with zero upstream calls
|
data/spec/messaging_spec.rb
CHANGED
@@ -297,4 +297,106 @@ describe AvroTurf::Messaging do
|
|
297
297
|
end
|
298
298
|
end
|
299
299
|
end
|
300
|
+
|
301
|
+
context "validating" do
|
302
|
+
subject(:encode){ avro.encode(message, schema_name: "person", validate: true) }
|
303
|
+
|
304
|
+
context "for correct message" do
|
305
|
+
it { expect { encode }.not_to raise_error }
|
306
|
+
end
|
307
|
+
|
308
|
+
context "when message has wrong type" do
|
309
|
+
let(:message) { { "full_name" => 123 } }
|
310
|
+
|
311
|
+
it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /\.full_name expected type string, got int/) }
|
312
|
+
end
|
313
|
+
|
314
|
+
context "when message contains extra fields (typo in key)" do
|
315
|
+
let(:message) { { "fulll_name" => "John Doe" } }
|
316
|
+
|
317
|
+
it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/) }
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
context 'fetching and registering schema' do
|
322
|
+
let(:schema_store) { AvroTurf::SchemaStore.new(path: "spec/schemas") }
|
323
|
+
|
324
|
+
let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
|
325
|
+
|
326
|
+
let(:avro) do
|
327
|
+
AvroTurf::Messaging.new(
|
328
|
+
registry: registry,
|
329
|
+
schema_store: schema_store,
|
330
|
+
logger: logger
|
331
|
+
)
|
332
|
+
end
|
333
|
+
|
334
|
+
let(:schema_id) { 234 }
|
335
|
+
|
336
|
+
context 'using fetch_schema' do
|
337
|
+
subject { avro.fetch_schema(subject: subj, version: version) }
|
338
|
+
|
339
|
+
let(:subj) { 'subject' }
|
340
|
+
|
341
|
+
let(:version) { 'version' }
|
342
|
+
|
343
|
+
let(:response) { {'id' => schema_id, 'schema' => schema_json} }
|
344
|
+
|
345
|
+
before do
|
346
|
+
allow(registry).to receive(:subject_version).with(subj, version).and_return(response)
|
347
|
+
end
|
348
|
+
|
349
|
+
it 'gets schema from registry' do
|
350
|
+
expect(subject).to eq([schema, schema_id])
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
context 'using fetch_schema_by_id' do
|
355
|
+
subject { avro.fetch_schema_by_id(schema_id) }
|
356
|
+
|
357
|
+
before do
|
358
|
+
allow(registry).to receive(:fetch).with(schema_id).and_return(schema_json)
|
359
|
+
end
|
360
|
+
|
361
|
+
it 'gets schema from registry' do
|
362
|
+
expect(subject).to eq([schema, schema_id])
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
context 'using register_schema' do
|
367
|
+
let(:schema_name) { 'schema_name' }
|
368
|
+
|
369
|
+
let(:namespace) { 'namespace' }
|
370
|
+
|
371
|
+
before do
|
372
|
+
allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(schema)
|
373
|
+
end
|
374
|
+
|
375
|
+
context 'when subject is not set' do
|
376
|
+
subject { avro.register_schema(schema_name: schema_name, namespace: namespace) }
|
377
|
+
|
378
|
+
before do
|
379
|
+
allow(registry).to receive(:register).with(schema.fullname, schema).and_return(schema_id)
|
380
|
+
end
|
381
|
+
|
382
|
+
it 'registers schema in registry' do
|
383
|
+
expect(subject).to eq([schema, schema_id])
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
context 'when subject is set' do
|
388
|
+
subject { avro.register_schema(schema_name: schema_name, namespace: namespace, subject: subj) }
|
389
|
+
|
390
|
+
let(:subj) { 'subject' }
|
391
|
+
|
392
|
+
before do
|
393
|
+
allow(registry).to receive(:register).with(subj, schema).and_return(schema_id)
|
394
|
+
end
|
395
|
+
|
396
|
+
it 'registers schema in registry' do
|
397
|
+
expect(subject).to eq([schema, schema_id])
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
300
402
|
end
|
data/spec/schema_store_spec.rb
CHANGED
@@ -198,6 +198,104 @@ describe AvroTurf::SchemaStore do
|
|
198
198
|
expect(schema.fullname).to eq "person"
|
199
199
|
end
|
200
200
|
|
201
|
+
# This test would fail under avro_turf <= v0.11.0
|
202
|
+
it "does NOT cache *nested* schemas in memory" do
|
203
|
+
FileUtils.mkdir_p("spec/schemas/test")
|
204
|
+
|
205
|
+
define_schema "test/person.avsc", <<-AVSC
|
206
|
+
{
|
207
|
+
"name": "person",
|
208
|
+
"namespace": "test",
|
209
|
+
"type": "record",
|
210
|
+
"fields": [
|
211
|
+
{
|
212
|
+
"name": "address",
|
213
|
+
"type": {
|
214
|
+
"name": "address",
|
215
|
+
"type": "record",
|
216
|
+
"fields": [
|
217
|
+
{ "name": "addr1", "type": "string" },
|
218
|
+
{ "name": "addr2", "type": "string" },
|
219
|
+
{ "name": "city", "type": "string" },
|
220
|
+
{ "name": "zip", "type": "string" }
|
221
|
+
]
|
222
|
+
}
|
223
|
+
}
|
224
|
+
]
|
225
|
+
}
|
226
|
+
AVSC
|
227
|
+
|
228
|
+
schema = store.find('person', 'test')
|
229
|
+
expect(schema.fullname).to eq "test.person"
|
230
|
+
|
231
|
+
expect { store.find('address', 'test') }.
|
232
|
+
to raise_error(AvroTurf::SchemaNotFoundError)
|
233
|
+
end
|
234
|
+
|
235
|
+
# This test would fail under avro_turf <= v0.11.0
|
236
|
+
it "allows two different avsc files to define nested sub-schemas with the same fullname" do
|
237
|
+
FileUtils.mkdir_p("spec/schemas/test")
|
238
|
+
|
239
|
+
define_schema "test/person.avsc", <<-AVSC
|
240
|
+
{
|
241
|
+
"name": "person",
|
242
|
+
"namespace": "test",
|
243
|
+
"type": "record",
|
244
|
+
"fields": [
|
245
|
+
{
|
246
|
+
"name": "location",
|
247
|
+
"type": {
|
248
|
+
"name": "location",
|
249
|
+
"type": "record",
|
250
|
+
"fields": [
|
251
|
+
{ "name": "city", "type": "string" },
|
252
|
+
{ "name": "zipcode", "type": "string" }
|
253
|
+
]
|
254
|
+
}
|
255
|
+
}
|
256
|
+
]
|
257
|
+
}
|
258
|
+
AVSC
|
259
|
+
|
260
|
+
define_schema "test/company.avsc", <<-AVSC
|
261
|
+
{
|
262
|
+
"name": "company",
|
263
|
+
"namespace": "test",
|
264
|
+
"type": "record",
|
265
|
+
"fields": [
|
266
|
+
{
|
267
|
+
"name": "headquarters",
|
268
|
+
"type": {
|
269
|
+
"name": "location",
|
270
|
+
"type": "record",
|
271
|
+
"fields": [
|
272
|
+
{ "name": "city", "type": "string" },
|
273
|
+
{ "name": "postcode", "type": "string" }
|
274
|
+
]
|
275
|
+
}
|
276
|
+
}
|
277
|
+
]
|
278
|
+
}
|
279
|
+
AVSC
|
280
|
+
|
281
|
+
company = nil
|
282
|
+
person = store.find('person', 'test')
|
283
|
+
|
284
|
+
# This should *NOT* raise the error:
|
285
|
+
# #<Avro::SchemaParseError: The name "test.location" is already in use.>
|
286
|
+
expect { company = store.find('company', 'test') }.not_to raise_error
|
287
|
+
|
288
|
+
person_location_field = person.fields_hash['location']
|
289
|
+
expect(person_location_field.type.name).to eq('location')
|
290
|
+
expect(person_location_field.type.fields_hash).to include('zipcode')
|
291
|
+
expect(person_location_field.type.fields_hash).not_to include('postcode')
|
292
|
+
|
293
|
+
company_headquarters_field = company.fields_hash['headquarters']
|
294
|
+
expect(company_headquarters_field.type.name).to eq('location')
|
295
|
+
expect(company_headquarters_field.type.fields_hash).to include('postcode')
|
296
|
+
expect(company_headquarters_field.type.fields_hash).not_to include('zipcode')
|
297
|
+
end
|
298
|
+
|
201
299
|
it "is thread safe" do
|
202
300
|
define_schema "address.avsc", <<-AVSC
|
203
301
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: avro_turf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: 1.7.7
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '1.
|
22
|
+
version: '1.11'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,21 +29,21 @@ dependencies:
|
|
29
29
|
version: 1.7.7
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '1.
|
32
|
+
version: '1.11'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: excon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
37
|
- - "~>"
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: '0.
|
39
|
+
version: '0.71'
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '0.
|
46
|
+
version: '0.71'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,8 +163,8 @@ executables: []
|
|
163
163
|
extensions: []
|
164
164
|
extra_rdoc_files: []
|
165
165
|
files:
|
166
|
-
- ".circleci/config.yml"
|
167
166
|
- ".github/workflows/ruby.yml"
|
167
|
+
- ".github/workflows/stale.yml"
|
168
168
|
- ".gitignore"
|
169
169
|
- ".rspec"
|
170
170
|
- CHANGELOG.md
|
@@ -249,8 +249,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
249
249
|
- !ruby/object:Gem::Version
|
250
250
|
version: '0'
|
251
251
|
requirements: []
|
252
|
-
|
253
|
-
rubygems_version: 2.7.6
|
252
|
+
rubygems_version: 3.1.2
|
254
253
|
signing_key:
|
255
254
|
specification_version: 4
|
256
255
|
summary: A library that makes it easier to use the Avro serialization format from
|
data/.circleci/config.yml
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
version: 2
|
2
|
-
jobs:
|
3
|
-
build:
|
4
|
-
environment:
|
5
|
-
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
|
6
|
-
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
|
7
|
-
docker:
|
8
|
-
- image: circleci/ruby:2.6.2
|
9
|
-
steps:
|
10
|
-
- checkout
|
11
|
-
- run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
|
12
|
-
- restore_cache:
|
13
|
-
keys:
|
14
|
-
# This branch if available
|
15
|
-
- v1-dep-{{ .Branch }}-
|
16
|
-
# Default branch if not
|
17
|
-
- v1-dep-master-
|
18
|
-
# Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
|
19
|
-
- v1-dep-
|
20
|
-
- run: gem install bundler --no-document
|
21
|
-
- run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
|
22
|
-
# Save dependency cache
|
23
|
-
- save_cache:
|
24
|
-
key: v1-dep-{{ .Branch }}-{{ epoch }}
|
25
|
-
paths:
|
26
|
-
- vendor/bundle
|
27
|
-
- ~/.bundle
|
28
|
-
- run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
|
29
|
-
- run:
|
30
|
-
command: bundle exec rspec --color --require spec_helper --format progress
|
31
|
-
- store_test_results:
|
32
|
-
path: /tmp/circleci-test-results
|
33
|
-
- store_artifacts:
|
34
|
-
path: /tmp/circleci-artifacts
|
35
|
-
- store_artifacts:
|
36
|
-
path: /tmp/circleci-test-results
|