avro_turf 0.11.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +5 -1
- data/.github/workflows/stale.yml +19 -0
- data/CHANGELOG.md +23 -1
- data/README.md +60 -0
- data/avro_turf.gemspec +2 -2
- data/lib/avro_turf/cached_confluent_schema_registry.rb +2 -0
- data/lib/avro_turf/confluent_schema_registry.rb +5 -1
- data/lib/avro_turf/disk_cache.rb +32 -9
- data/lib/avro_turf/in_memory_cache.rb +2 -2
- data/lib/avro_turf/messaging.rb +27 -14
- data/lib/avro_turf/schema_store.rb +33 -10
- data/lib/avro_turf/version.rb +1 -1
- data/spec/confluent_schema_registry_spec.rb +12 -0
- data/spec/disk_cached_confluent_schema_registry_spec.rb +71 -1
- data/spec/messaging_spec.rb +102 -0
- data/spec/schema_store_spec.rb +98 -0
- metadata +8 -9
- data/.circleci/config.yml +0 -36
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4e9638703b961c348d06adcfa3e34ac2bd908e68f9cf7762a550ae4ef453b8a
|
4
|
+
data.tar.gz: 6d53b1f9b5e4b9e5a3aaeb13207bf350e2b56c8db138cb499643180a94a684b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7045f852f20d3ddeca3256724c918ed255722a1556f662d159b805921670d0a2af3e574e3c967bee7622d5d3e05f5348d6023b250006c3651cb7b1f9cec059bf
|
7
|
+
data.tar.gz: 50ca19fb058246ba19b28472e03bedcfbc09ddcd336d46deb29dec4b406d81a65c2b0f6ca84ec36d5227da035906976af133cf757cbfc2f20abda116c8e17d8c
|
data/.github/workflows/ruby.yml
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
name: Ruby
|
2
2
|
|
3
|
-
on: [push]
|
3
|
+
on: [push, pull_request]
|
4
4
|
|
5
5
|
jobs:
|
6
6
|
build:
|
7
7
|
|
8
8
|
runs-on: ubuntu-latest
|
9
|
+
strategy:
|
10
|
+
matrix:
|
11
|
+
ruby-version: [1.8.x, 1.9.x, 2.0.x, 2.1.x, 2.2.x,
|
12
|
+
2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
|
9
13
|
|
10
14
|
steps:
|
11
15
|
- uses: actions/checkout@v1
|
@@ -0,0 +1,19 @@
|
|
1
|
+
name: Mark stale issues and pull requests
|
2
|
+
|
3
|
+
on:
|
4
|
+
schedule:
|
5
|
+
- cron: "0 0 * * *"
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
stale:
|
9
|
+
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/stale@v1
|
14
|
+
with:
|
15
|
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
16
|
+
stale-issue-message: 'Stale issue message'
|
17
|
+
stale-pr-message: 'Stale pull request message'
|
18
|
+
stale-issue-label: 'no-issue-activity'
|
19
|
+
stale-pr-label: 'no-pr-activity'
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,29 @@
|
|
1
|
-
#
|
1
|
+
# AvroTurf
|
2
2
|
|
3
3
|
## Unreleased
|
4
4
|
|
5
|
+
## v1.3.1
|
6
|
+
|
7
|
+
- Prevent CachedConfluentSchemaRegistry from caching the 'latest' version (#140)
|
8
|
+
- Fix issue with zero length schema cache file (#138)
|
9
|
+
|
10
|
+
## v1.3.0
|
11
|
+
|
12
|
+
- Add support for plain user/password auth to ConfluentSchemaRegistry (#120)
|
13
|
+
|
14
|
+
## v1.2.0
|
15
|
+
|
16
|
+
- Expose `fetch_schema`, `fetch_schema_by_id` and `register_schema` schema in `Messaging` interface (#117, #119)
|
17
|
+
- Add ability to validate message before encoding in `Messaging#encode` interface (#116, #118)
|
18
|
+
|
19
|
+
## v1.1.0
|
20
|
+
|
21
|
+
- Compatibility with Avro v1.10.x.
|
22
|
+
|
23
|
+
## v1.0.0
|
24
|
+
|
25
|
+
- Stop caching nested sub-schemas (#111)
|
26
|
+
|
5
27
|
## v0.11.0
|
6
28
|
|
7
29
|
- Add proxy support (#107)
|
data/README.md
CHANGED
@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
|
|
16
16
|
|
17
17
|
The aliases for the original names will be removed in a future release.
|
18
18
|
|
19
|
+
## Note about finding nested schemas
|
20
|
+
|
21
|
+
As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
|
22
|
+
|
23
|
+
As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
|
24
|
+
|
25
|
+
```json
|
26
|
+
{
|
27
|
+
"name": "person",
|
28
|
+
"namespace": "contacts",
|
29
|
+
"type": "record",
|
30
|
+
"fields": [
|
31
|
+
{
|
32
|
+
"name": "address",
|
33
|
+
"type": {
|
34
|
+
"name": "address",
|
35
|
+
"type": "record",
|
36
|
+
"fields": [
|
37
|
+
{ "name": "addr1", "type": "string" },
|
38
|
+
{ "name": "addr2", "type": "string" },
|
39
|
+
{ "name": "city", "type": "string" },
|
40
|
+
{ "name": "zip", "type": "string" }
|
41
|
+
]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
]
|
45
|
+
}
|
46
|
+
```
|
47
|
+
...this will no longer work in v0.12.0:
|
48
|
+
```ruby
|
49
|
+
store = AvroTurf::SchemaStore.new(path: 'my/schemas')
|
50
|
+
store.load_schemas!
|
51
|
+
|
52
|
+
# Accessing 'person' is correct and works fine.
|
53
|
+
person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
|
54
|
+
|
55
|
+
# Trying to access 'address' raises AvroTurf::SchemaNotFoundError
|
56
|
+
address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
|
57
|
+
```
|
58
|
+
|
59
|
+
For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
|
60
|
+
|
19
61
|
## Installation
|
20
62
|
|
21
63
|
Add this line to your application's Gemfile:
|
@@ -136,6 +178,10 @@ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version:
|
|
136
178
|
# of the same schema version will be served by the cache.
|
137
179
|
data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
|
138
180
|
|
181
|
+
# Message can be validated before encoding to get a description of problem through
|
182
|
+
# Avro::SchemaValidator::ValidationError exception
|
183
|
+
data = avro.encode({ "titl" => "hello, world" }, schema_name: "greeting", validate: true)
|
184
|
+
|
139
185
|
# When decoding, the schema will be fetched from the registry and cached. Subsequent
|
140
186
|
# instances of the same schema id will be served by the cache.
|
141
187
|
avro.decode(data) #=> { "title" => "hello, world" }
|
@@ -147,6 +193,20 @@ result.message #=> { "title" => "hello, world" }
|
|
147
193
|
result.schema_id #=> 3
|
148
194
|
result.writer_schema #=> #<Avro::Schema: ...>
|
149
195
|
result.reader_schema #=> nil
|
196
|
+
|
197
|
+
# You can also work with schema through this interface:
|
198
|
+
# Fetch latest schema for subject from registry
|
199
|
+
schema, schema_id = avro.fetch_schema(subject: 'greeting')
|
200
|
+
# Fetch specific version
|
201
|
+
schema, schema_id = avro.fetch_schema(subject: 'greeting', version: 1)
|
202
|
+
# Fetch schema by id
|
203
|
+
schema, schema_id = avro.fetch_schema_by_id(3)
|
204
|
+
# Register schema fetched from store by name
|
205
|
+
schema, schema_id = avro.register_schema(schema_name: 'greeting')
|
206
|
+
# Specify namespace (same as schema_name: 'somewhere.greeting')
|
207
|
+
schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere')
|
208
|
+
# Customize subject under which to register schema
|
209
|
+
schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere', subject: 'test')
|
150
210
|
```
|
151
211
|
|
152
212
|
### Confluent Schema Registry Client
|
data/avro_turf.gemspec
CHANGED
@@ -17,8 +17,8 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
|
20
|
-
spec.add_dependency "avro", ">= 1.7.7", "< 1.
|
21
|
-
spec.add_dependency "excon", "~> 0.
|
20
|
+
spec.add_dependency "avro", ">= 1.7.7", "< 1.11"
|
21
|
+
spec.add_dependency "excon", "~> 0.71"
|
22
22
|
|
23
23
|
spec.add_development_dependency "bundler", "~> 2.0"
|
24
24
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -33,6 +33,8 @@ class AvroTurf::CachedConfluentSchemaRegistry
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def subject_version(subject, version = 'latest')
|
36
|
+
return @upstream.subject_version(subject, version) if version == 'latest'
|
37
|
+
|
36
38
|
@cache.lookup_by_version(subject, version) ||
|
37
39
|
@cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
|
38
40
|
end
|
@@ -7,6 +7,8 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
7
7
|
url,
|
8
8
|
logger: Logger.new($stdout),
|
9
9
|
proxy: nil,
|
10
|
+
user: nil,
|
11
|
+
password: nil,
|
10
12
|
client_cert: nil,
|
11
13
|
client_key: nil,
|
12
14
|
client_key_pass: nil,
|
@@ -17,10 +19,12 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
17
19
|
headers = {
|
18
20
|
"Content-Type" => CONTENT_TYPE
|
19
21
|
}
|
20
|
-
headers[:proxy] = proxy
|
22
|
+
headers[:proxy] = proxy unless proxy.nil?
|
21
23
|
@connection = Excon.new(
|
22
24
|
url,
|
23
25
|
headers: headers,
|
26
|
+
user: user,
|
27
|
+
password: password,
|
24
28
|
client_cert: client_cert,
|
25
29
|
client_key: client_key,
|
26
30
|
client_key_pass: client_key_pass,
|
data/lib/avro_turf/disk_cache.rb
CHANGED
@@ -2,15 +2,19 @@
|
|
2
2
|
# Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
|
3
3
|
class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
4
4
|
|
5
|
-
def initialize(disk_path)
|
5
|
+
def initialize(disk_path, logger: Logger.new($stdout))
|
6
6
|
super()
|
7
7
|
|
8
|
+
@logger = logger
|
9
|
+
|
8
10
|
# load the write-thru cache on startup, if it exists
|
9
11
|
@schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
|
10
|
-
|
12
|
+
hash = read_from_disk_cache(@schemas_by_id_path)
|
13
|
+
@schemas_by_id = hash if hash
|
11
14
|
|
12
15
|
@ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
|
13
|
-
|
16
|
+
hash = read_from_disk_cache(@ids_by_schema_path)
|
17
|
+
@ids_by_schema = hash if hash
|
14
18
|
|
15
19
|
@schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
|
16
20
|
@schemas_by_subject_version = {}
|
@@ -31,12 +35,18 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
31
35
|
return value
|
32
36
|
end
|
33
37
|
|
34
|
-
# override to
|
38
|
+
# override to use a json serializable cache key
|
39
|
+
def lookup_by_schema(subject, schema)
|
40
|
+
key = "#{subject}#{schema}"
|
41
|
+
@ids_by_schema[key]
|
42
|
+
end
|
43
|
+
|
44
|
+
# override to use a json serializable cache key and update the file cache
|
35
45
|
def store_by_schema(subject, schema, id)
|
36
|
-
|
37
|
-
|
46
|
+
key = "#{subject}#{schema}"
|
47
|
+
@ids_by_schema[key] = id
|
38
48
|
File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
|
39
|
-
|
49
|
+
id
|
40
50
|
end
|
41
51
|
|
42
52
|
# checks instance var (in-memory cache) for schema
|
@@ -49,7 +59,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
49
59
|
|
50
60
|
return schema unless schema.nil?
|
51
61
|
|
52
|
-
hash =
|
62
|
+
hash = read_from_disk_cache(@schemas_by_subject_version_path)
|
53
63
|
if hash
|
54
64
|
@schemas_by_subject_version = hash
|
55
65
|
@schemas_by_subject_version[key]
|
@@ -63,7 +73,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
63
73
|
# update instance var (in memory-cache) to match
|
64
74
|
def store_by_version(subject, version, schema)
|
65
75
|
key = "#{subject}#{version}"
|
66
|
-
hash =
|
76
|
+
hash = read_from_disk_cache(@schemas_by_subject_version_path)
|
67
77
|
hash = if hash
|
68
78
|
hash[key] = schema
|
69
79
|
hash
|
@@ -77,6 +87,19 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
77
87
|
@schemas_by_subject_version[key]
|
78
88
|
end
|
79
89
|
|
90
|
+
# Parse the file from disk, if it exists and is not zero length
|
91
|
+
private def read_from_disk_cache(path)
|
92
|
+
if File.exist?(path)
|
93
|
+
if File.size(path)!=0
|
94
|
+
return JSON.parse(File.read(path))
|
95
|
+
else
|
96
|
+
# just log a message if skipping zero length file
|
97
|
+
@logger.warn "skipping JSON.parse of zero length file at #{path}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return nil
|
101
|
+
end
|
102
|
+
|
80
103
|
private def write_to_disk_cache(path, hash)
|
81
104
|
File.write(path, JSON.pretty_generate(hash))
|
82
105
|
end
|
@@ -17,12 +17,12 @@ class AvroTurf::InMemoryCache
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def lookup_by_schema(subject, schema)
|
20
|
-
key = subject
|
20
|
+
key = [subject, schema.to_s]
|
21
21
|
@ids_by_schema[key]
|
22
22
|
end
|
23
23
|
|
24
24
|
def store_by_schema(subject, schema, id)
|
25
|
-
key = subject
|
25
|
+
key = [subject, schema.to_s]
|
26
26
|
@ids_by_schema[key] = id
|
27
27
|
end
|
28
28
|
|
data/lib/avro_turf/messaging.rb
CHANGED
@@ -34,6 +34,8 @@ class AvroTurf
|
|
34
34
|
# namespace - The String default schema namespace.
|
35
35
|
# logger - The Logger that should be used to log information (optional).
|
36
36
|
# proxy - Forward the request via proxy (optional).
|
37
|
+
# user - User for basic auth (optional).
|
38
|
+
# password - Password for basic auth (optional).
|
37
39
|
# client_cert - Name of file containing client certificate (optional).
|
38
40
|
# client_key - Name of file containing client private key to go with client_cert (optional).
|
39
41
|
# client_key_pass - Password to go with client_key (optional).
|
@@ -47,6 +49,8 @@ class AvroTurf
|
|
47
49
|
namespace: nil,
|
48
50
|
logger: nil,
|
49
51
|
proxy: nil,
|
52
|
+
user: nil,
|
53
|
+
password: nil,
|
50
54
|
client_cert: nil,
|
51
55
|
client_key: nil,
|
52
56
|
client_key_pass: nil,
|
@@ -61,6 +65,8 @@ class AvroTurf
|
|
61
65
|
registry_url,
|
62
66
|
logger: @logger,
|
63
67
|
proxy: proxy,
|
68
|
+
user: user,
|
69
|
+
password: password,
|
64
70
|
client_cert: client_cert,
|
65
71
|
client_key: client_key,
|
66
72
|
client_key_pass: client_key_pass,
|
@@ -84,19 +90,26 @@ class AvroTurf
|
|
84
90
|
# the data. Must match the schema used when encoding (optional).
|
85
91
|
# schema_id - The integer id of the schema that should be used to encode
|
86
92
|
# the data.
|
93
|
+
# validate - The boolean for performing complete message validation before
|
94
|
+
# encoding it, Avro::SchemaValidator::ValidationError with
|
95
|
+
# a descriptive message will be raised in case of invalid message.
|
87
96
|
#
|
88
97
|
# Returns the encoded data as a String.
|
89
|
-
def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil)
|
90
|
-
|
98
|
+
def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
|
99
|
+
schema, schema_id = if schema_id
|
91
100
|
fetch_schema_by_id(schema_id)
|
92
101
|
elsif subject && version
|
93
|
-
fetch_schema(subject, version)
|
102
|
+
fetch_schema(subject: subject, version: version)
|
94
103
|
elsif schema_name
|
95
|
-
register_schema(subject, schema_name, namespace)
|
104
|
+
register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
|
96
105
|
else
|
97
106
|
raise ArgumentError.new('Neither schema_name nor schema_id nor subject + version provided to determine the schema.')
|
98
107
|
end
|
99
108
|
|
109
|
+
if validate
|
110
|
+
Avro::SchemaValidator.validate!(schema, message, recursive: true, encoded: false, fail_on_extra_fields: true)
|
111
|
+
end
|
112
|
+
|
100
113
|
stream = StringIO.new
|
101
114
|
writer = Avro::IO::DatumWriter.new(schema)
|
102
115
|
encoder = Avro::IO::BinaryEncoder.new(stream)
|
@@ -111,7 +124,7 @@ class AvroTurf
|
|
111
124
|
writer.write(message, encoder)
|
112
125
|
|
113
126
|
stream.string
|
114
|
-
rescue Excon::
|
127
|
+
rescue Excon::Errors::NotFound
|
115
128
|
if schema_id
|
116
129
|
raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
|
117
130
|
else
|
@@ -169,31 +182,31 @@ class AvroTurf
|
|
169
182
|
raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
|
170
183
|
end
|
171
184
|
|
172
|
-
private
|
173
|
-
|
174
185
|
# Providing subject and version to determine the schema,
|
175
186
|
# which skips the auto registeration of schema on the schema registry.
|
176
187
|
# Fetch the schema from registry with the provided subject name and version.
|
177
|
-
def fetch_schema(subject
|
188
|
+
def fetch_schema(subject:, version: 'latest')
|
178
189
|
schema_data = @registry.subject_version(subject, version)
|
179
190
|
schema_id = schema_data.fetch('id')
|
180
191
|
schema = Avro::Schema.parse(schema_data.fetch('schema'))
|
181
|
-
[
|
192
|
+
[schema, schema_id]
|
182
193
|
end
|
183
194
|
|
184
195
|
# Fetch the schema from registry with the provided schema_id.
|
185
196
|
def fetch_schema_by_id(schema_id)
|
186
|
-
|
187
|
-
|
188
|
-
|
197
|
+
schema = @schemas_by_id.fetch(schema_id) do
|
198
|
+
schema_json = @registry.fetch(schema_id)
|
199
|
+
Avro::Schema.parse(schema_json)
|
200
|
+
end
|
201
|
+
[schema, schema_id]
|
189
202
|
end
|
190
203
|
|
191
204
|
# Schemas are registered under the full name of the top level Avro record
|
192
205
|
# type, or `subject` if it's provided.
|
193
|
-
def register_schema(subject
|
206
|
+
def register_schema(schema_name:, subject: nil, namespace: nil)
|
194
207
|
schema = @schema_store.find(schema_name, namespace)
|
195
208
|
schema_id = @registry.register(subject || schema.fullname, schema)
|
196
|
-
[
|
209
|
+
[schema, schema_id]
|
197
210
|
end
|
198
211
|
end
|
199
212
|
end
|
@@ -22,7 +22,7 @@ class AvroTurf::SchemaStore
|
|
22
22
|
# Still need to check is the schema already loaded
|
23
23
|
return @schemas[fullname] if @schemas.key?(fullname)
|
24
24
|
|
25
|
-
load_schema!(fullname
|
25
|
+
load_schema!(fullname)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -42,34 +42,57 @@ class AvroTurf::SchemaStore
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
|
45
|
+
protected
|
46
46
|
|
47
47
|
# Loads single schema
|
48
48
|
# Such method is not thread-safe, do not call it of from mutex synchronization routine
|
49
|
-
def load_schema!(fullname,
|
50
|
-
|
51
|
-
schema_path = File.join(@path, *namespace, schema_name + ".avsc")
|
49
|
+
def load_schema!(fullname, local_schemas_cache = {})
|
50
|
+
schema_path = build_schema_path(fullname)
|
52
51
|
schema_json = JSON.parse(File.read(schema_path))
|
53
|
-
schema = Avro::Schema.real_parse(schema_json, @schemas)
|
54
52
|
|
53
|
+
schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
|
54
|
+
|
55
|
+
# Don't cache the parsed schema until after its fullname is validated
|
55
56
|
if schema.respond_to?(:fullname) && schema.fullname != fullname
|
56
57
|
raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
|
57
58
|
end
|
58
59
|
|
60
|
+
# Cache only this new top-level schema by its fullname. It's critical
|
61
|
+
# not to make every sub-schema resolvable at the top level here because
|
62
|
+
# multiple different avsc files may define the same sub-schema, and
|
63
|
+
# if we share the @schemas cache across all parsing contexts, the Avro
|
64
|
+
# gem will raise an Avro::SchemaParseError when parsing another avsc
|
65
|
+
# file that contains a subschema with the same fullname as one
|
66
|
+
# encountered previously in a different file:
|
67
|
+
# <Avro::SchemaParseError: The name "foo.bar" is already in use.>
|
68
|
+
# Essentially, the only schemas that should be resolvable in @schemas
|
69
|
+
# are those that have their own .avsc files on disk.
|
70
|
+
@schemas[fullname] = schema
|
71
|
+
|
59
72
|
schema
|
60
73
|
rescue ::Avro::SchemaParseError => e
|
61
74
|
# This is a hack in order to figure out exactly which type was missing. The
|
62
75
|
# Avro gem ought to provide this data directly.
|
63
76
|
if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
|
64
|
-
|
77
|
+
# Try to first resolve a referenced schema from disk.
|
78
|
+
# If this is successful, the Avro gem will have mutated the
|
79
|
+
# local_schemas_cache, adding all the new schemas it found.
|
80
|
+
load_schema!($1, local_schemas_cache)
|
65
81
|
|
66
|
-
#
|
67
|
-
|
68
|
-
|
82
|
+
# Attempt to re-parse the original schema now that the dependency
|
83
|
+
# has been resolved and use the now-updated local_schemas_cache to
|
84
|
+
# pick up where we left off.
|
85
|
+
local_schemas_cache.delete(fullname)
|
86
|
+
load_schema!(fullname, local_schemas_cache)
|
69
87
|
else
|
70
88
|
raise
|
71
89
|
end
|
72
90
|
rescue Errno::ENOENT, Errno::ENAMETOOLONG
|
73
91
|
raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
|
74
92
|
end
|
93
|
+
|
94
|
+
def build_schema_path(fullname)
|
95
|
+
*namespace, schema_name = fullname.split(".")
|
96
|
+
schema_path = File.join(@path, *namespace, schema_name + ".avsc")
|
97
|
+
end
|
75
98
|
end
|
data/lib/avro_turf/version.rb
CHANGED
@@ -3,6 +3,8 @@ require 'avro_turf/confluent_schema_registry'
|
|
3
3
|
require 'avro_turf/test/fake_confluent_schema_registry_server'
|
4
4
|
|
5
5
|
describe AvroTurf::ConfluentSchemaRegistry do
|
6
|
+
let(:user) { "abc" }
|
7
|
+
let(:password) { "xxyyzz" }
|
6
8
|
let(:client_cert) { "test client cert" }
|
7
9
|
let(:client_key) { "test client key" }
|
8
10
|
let(:client_key_pass) { "test client key password" }
|
@@ -18,4 +20,14 @@ describe AvroTurf::ConfluentSchemaRegistry do
|
|
18
20
|
)
|
19
21
|
}
|
20
22
|
end
|
23
|
+
|
24
|
+
it_behaves_like "a confluent schema registry client" do
|
25
|
+
let(:registry) {
|
26
|
+
described_class.new(
|
27
|
+
registry_url,
|
28
|
+
user: user,
|
29
|
+
password: password,
|
30
|
+
)
|
31
|
+
}
|
32
|
+
end
|
21
33
|
end
|
@@ -4,7 +4,8 @@ require 'avro_turf/test/fake_confluent_schema_registry_server'
|
|
4
4
|
|
5
5
|
describe AvroTurf::CachedConfluentSchemaRegistry do
|
6
6
|
let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
|
7
|
-
let(:
|
7
|
+
let(:logger_io) { StringIO.new }
|
8
|
+
let(:cache) { AvroTurf::DiskCache.new("spec/cache", logger: Logger.new(logger_io))}
|
8
9
|
let(:registry) { described_class.new(upstream, cache: cache) }
|
9
10
|
let(:id) { rand(999) }
|
10
11
|
let(:schema) do
|
@@ -80,6 +81,40 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
|
|
80
81
|
end
|
81
82
|
end
|
82
83
|
|
84
|
+
describe "#fetch (zero length cache file)" do
|
85
|
+
let(:cache_after) do
|
86
|
+
{
|
87
|
+
"#{id}" => "#{schema}"
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
before do
|
92
|
+
# setup the disk cache with a zero length file
|
93
|
+
File.write(File.join("spec/cache", "schemas_by_id.json"), '')
|
94
|
+
end
|
95
|
+
|
96
|
+
it "skips zero length disk cache" do
|
97
|
+
# multiple calls return same result, with only one upstream call
|
98
|
+
allow(upstream).to receive(:fetch).with(id).and_return(schema)
|
99
|
+
expect(registry.fetch(id)).to eq(schema)
|
100
|
+
expect(registry.fetch(id)).to eq(schema)
|
101
|
+
expect(upstream).to have_received(:fetch).exactly(1).times
|
102
|
+
expect(load_cache("schemas_by_id.json")).to eq cache_after
|
103
|
+
expect(logger_io.string).to include("zero length file at spec/cache/schemas_by_id.json")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "#fetch (corrupt cache file)" do
|
108
|
+
before do
|
109
|
+
# setup the disk cache with a corrupt file (i.e. not json)
|
110
|
+
File.write(File.join("spec/cache", "schemas_by_id.json"), 'NOTJSON')
|
111
|
+
end
|
112
|
+
|
113
|
+
it "raises error on corrupt cache file" do
|
114
|
+
expect{registry.fetch(id)}.to raise_error(JSON::ParserError, /unexpected token/)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
83
118
|
describe "#register" do
|
84
119
|
let(:subject_name) { "a_subject" }
|
85
120
|
let(:cache_before) do
|
@@ -120,6 +155,41 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
|
|
120
155
|
end
|
121
156
|
end
|
122
157
|
|
158
|
+
describe "#register (zero length cache file)" do
|
159
|
+
let(:subject_name) { "a_subject" }
|
160
|
+
let(:cache_after) do
|
161
|
+
{
|
162
|
+
"#{subject_name}#{schema}" => id
|
163
|
+
}
|
164
|
+
end
|
165
|
+
|
166
|
+
before do
|
167
|
+
# setup the disk cache with a zero length file
|
168
|
+
File.write(File.join("spec/cache", "ids_by_schema.json"), '')
|
169
|
+
end
|
170
|
+
|
171
|
+
it "skips zero length disk cache" do
|
172
|
+
# multiple calls return same result, with only one upstream call
|
173
|
+
allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
|
174
|
+
expect(registry.register(subject_name, schema)).to eq(id)
|
175
|
+
expect(registry.register(subject_name, schema)).to eq(id)
|
176
|
+
expect(upstream).to have_received(:register).exactly(1).times
|
177
|
+
expect(load_cache("ids_by_schema.json")).to eq cache_after
|
178
|
+
expect(logger_io.string).to include("zero length file at spec/cache/ids_by_schema.json")
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
describe "#register (corrupt cache file)" do
|
183
|
+
before do
|
184
|
+
# setup the disk cache with a corrupt file (i.e. not json)
|
185
|
+
File.write(File.join("spec/cache", "ids_by_schema.json"), 'NOTJSON')
|
186
|
+
end
|
187
|
+
|
188
|
+
it "raises error on corrupt cache file" do
|
189
|
+
expect{registry.register(subject_name, schema)}.to raise_error(JSON::ParserError, /unexpected token/)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
123
193
|
describe "#subject_version" do
|
124
194
|
it "writes thru to disk cache" do
|
125
195
|
# multiple calls return same result, with zero upstream calls
|
data/spec/messaging_spec.rb
CHANGED
@@ -297,4 +297,106 @@ describe AvroTurf::Messaging do
|
|
297
297
|
end
|
298
298
|
end
|
299
299
|
end
|
300
|
+
|
301
|
+
context "validating" do
|
302
|
+
subject(:encode){ avro.encode(message, schema_name: "person", validate: true) }
|
303
|
+
|
304
|
+
context "for correct message" do
|
305
|
+
it { expect { encode }.not_to raise_error }
|
306
|
+
end
|
307
|
+
|
308
|
+
context "when message has wrong type" do
|
309
|
+
let(:message) { { "full_name" => 123 } }
|
310
|
+
|
311
|
+
it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /\.full_name expected type string, got int/) }
|
312
|
+
end
|
313
|
+
|
314
|
+
context "when message contains extra fields (typo in key)" do
|
315
|
+
let(:message) { { "fulll_name" => "John Doe" } }
|
316
|
+
|
317
|
+
it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/) }
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
context 'fetching and registering schema' do
|
322
|
+
let(:schema_store) { AvroTurf::SchemaStore.new(path: "spec/schemas") }
|
323
|
+
|
324
|
+
let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
|
325
|
+
|
326
|
+
let(:avro) do
|
327
|
+
AvroTurf::Messaging.new(
|
328
|
+
registry: registry,
|
329
|
+
schema_store: schema_store,
|
330
|
+
logger: logger
|
331
|
+
)
|
332
|
+
end
|
333
|
+
|
334
|
+
let(:schema_id) { 234 }
|
335
|
+
|
336
|
+
context 'using fetch_schema' do
|
337
|
+
subject { avro.fetch_schema(subject: subj, version: version) }
|
338
|
+
|
339
|
+
let(:subj) { 'subject' }
|
340
|
+
|
341
|
+
let(:version) { 'version' }
|
342
|
+
|
343
|
+
let(:response) { {'id' => schema_id, 'schema' => schema_json} }
|
344
|
+
|
345
|
+
before do
|
346
|
+
allow(registry).to receive(:subject_version).with(subj, version).and_return(response)
|
347
|
+
end
|
348
|
+
|
349
|
+
it 'gets schema from registry' do
|
350
|
+
expect(subject).to eq([schema, schema_id])
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
context 'using fetch_schema_by_id' do
|
355
|
+
subject { avro.fetch_schema_by_id(schema_id) }
|
356
|
+
|
357
|
+
before do
|
358
|
+
allow(registry).to receive(:fetch).with(schema_id).and_return(schema_json)
|
359
|
+
end
|
360
|
+
|
361
|
+
it 'gets schema from registry' do
|
362
|
+
expect(subject).to eq([schema, schema_id])
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
context 'using register_schema' do
|
367
|
+
let(:schema_name) { 'schema_name' }
|
368
|
+
|
369
|
+
let(:namespace) { 'namespace' }
|
370
|
+
|
371
|
+
before do
|
372
|
+
allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(schema)
|
373
|
+
end
|
374
|
+
|
375
|
+
context 'when subject is not set' do
|
376
|
+
subject { avro.register_schema(schema_name: schema_name, namespace: namespace) }
|
377
|
+
|
378
|
+
before do
|
379
|
+
allow(registry).to receive(:register).with(schema.fullname, schema).and_return(schema_id)
|
380
|
+
end
|
381
|
+
|
382
|
+
it 'registers schema in registry' do
|
383
|
+
expect(subject).to eq([schema, schema_id])
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
context 'when subject is set' do
|
388
|
+
subject { avro.register_schema(schema_name: schema_name, namespace: namespace, subject: subj) }
|
389
|
+
|
390
|
+
let(:subj) { 'subject' }
|
391
|
+
|
392
|
+
before do
|
393
|
+
allow(registry).to receive(:register).with(subj, schema).and_return(schema_id)
|
394
|
+
end
|
395
|
+
|
396
|
+
it 'registers schema in registry' do
|
397
|
+
expect(subject).to eq([schema, schema_id])
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
300
402
|
end
|
data/spec/schema_store_spec.rb
CHANGED
@@ -198,6 +198,104 @@ describe AvroTurf::SchemaStore do
|
|
198
198
|
expect(schema.fullname).to eq "person"
|
199
199
|
end
|
200
200
|
|
201
|
+
# This test would fail under avro_turf <= v0.11.0
|
202
|
+
it "does NOT cache *nested* schemas in memory" do
|
203
|
+
FileUtils.mkdir_p("spec/schemas/test")
|
204
|
+
|
205
|
+
define_schema "test/person.avsc", <<-AVSC
|
206
|
+
{
|
207
|
+
"name": "person",
|
208
|
+
"namespace": "test",
|
209
|
+
"type": "record",
|
210
|
+
"fields": [
|
211
|
+
{
|
212
|
+
"name": "address",
|
213
|
+
"type": {
|
214
|
+
"name": "address",
|
215
|
+
"type": "record",
|
216
|
+
"fields": [
|
217
|
+
{ "name": "addr1", "type": "string" },
|
218
|
+
{ "name": "addr2", "type": "string" },
|
219
|
+
{ "name": "city", "type": "string" },
|
220
|
+
{ "name": "zip", "type": "string" }
|
221
|
+
]
|
222
|
+
}
|
223
|
+
}
|
224
|
+
]
|
225
|
+
}
|
226
|
+
AVSC
|
227
|
+
|
228
|
+
schema = store.find('person', 'test')
|
229
|
+
expect(schema.fullname).to eq "test.person"
|
230
|
+
|
231
|
+
expect { store.find('address', 'test') }.
|
232
|
+
to raise_error(AvroTurf::SchemaNotFoundError)
|
233
|
+
end
|
234
|
+
|
235
|
+
# This test would fail under avro_turf <= v0.11.0
|
236
|
+
it "allows two different avsc files to define nested sub-schemas with the same fullname" do
|
237
|
+
FileUtils.mkdir_p("spec/schemas/test")
|
238
|
+
|
239
|
+
define_schema "test/person.avsc", <<-AVSC
|
240
|
+
{
|
241
|
+
"name": "person",
|
242
|
+
"namespace": "test",
|
243
|
+
"type": "record",
|
244
|
+
"fields": [
|
245
|
+
{
|
246
|
+
"name": "location",
|
247
|
+
"type": {
|
248
|
+
"name": "location",
|
249
|
+
"type": "record",
|
250
|
+
"fields": [
|
251
|
+
{ "name": "city", "type": "string" },
|
252
|
+
{ "name": "zipcode", "type": "string" }
|
253
|
+
]
|
254
|
+
}
|
255
|
+
}
|
256
|
+
]
|
257
|
+
}
|
258
|
+
AVSC
|
259
|
+
|
260
|
+
define_schema "test/company.avsc", <<-AVSC
|
261
|
+
{
|
262
|
+
"name": "company",
|
263
|
+
"namespace": "test",
|
264
|
+
"type": "record",
|
265
|
+
"fields": [
|
266
|
+
{
|
267
|
+
"name": "headquarters",
|
268
|
+
"type": {
|
269
|
+
"name": "location",
|
270
|
+
"type": "record",
|
271
|
+
"fields": [
|
272
|
+
{ "name": "city", "type": "string" },
|
273
|
+
{ "name": "postcode", "type": "string" }
|
274
|
+
]
|
275
|
+
}
|
276
|
+
}
|
277
|
+
]
|
278
|
+
}
|
279
|
+
AVSC
|
280
|
+
|
281
|
+
company = nil
|
282
|
+
person = store.find('person', 'test')
|
283
|
+
|
284
|
+
# This should *NOT* raise the error:
|
285
|
+
# #<Avro::SchemaParseError: The name "test.location" is already in use.>
|
286
|
+
expect { company = store.find('company', 'test') }.not_to raise_error
|
287
|
+
|
288
|
+
person_location_field = person.fields_hash['location']
|
289
|
+
expect(person_location_field.type.name).to eq('location')
|
290
|
+
expect(person_location_field.type.fields_hash).to include('zipcode')
|
291
|
+
expect(person_location_field.type.fields_hash).not_to include('postcode')
|
292
|
+
|
293
|
+
company_headquarters_field = company.fields_hash['headquarters']
|
294
|
+
expect(company_headquarters_field.type.name).to eq('location')
|
295
|
+
expect(company_headquarters_field.type.fields_hash).to include('postcode')
|
296
|
+
expect(company_headquarters_field.type.fields_hash).not_to include('zipcode')
|
297
|
+
end
|
298
|
+
|
201
299
|
it "is thread safe" do
|
202
300
|
define_schema "address.avsc", <<-AVSC
|
203
301
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: avro_turf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: 1.7.7
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '1.
|
22
|
+
version: '1.11'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,21 +29,21 @@ dependencies:
|
|
29
29
|
version: 1.7.7
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '1.
|
32
|
+
version: '1.11'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: excon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
37
|
- - "~>"
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: '0.
|
39
|
+
version: '0.71'
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '0.
|
46
|
+
version: '0.71'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,8 +163,8 @@ executables: []
|
|
163
163
|
extensions: []
|
164
164
|
extra_rdoc_files: []
|
165
165
|
files:
|
166
|
-
- ".circleci/config.yml"
|
167
166
|
- ".github/workflows/ruby.yml"
|
167
|
+
- ".github/workflows/stale.yml"
|
168
168
|
- ".gitignore"
|
169
169
|
- ".rspec"
|
170
170
|
- CHANGELOG.md
|
@@ -249,8 +249,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
249
249
|
- !ruby/object:Gem::Version
|
250
250
|
version: '0'
|
251
251
|
requirements: []
|
252
|
-
|
253
|
-
rubygems_version: 2.7.6
|
252
|
+
rubygems_version: 3.1.2
|
254
253
|
signing_key:
|
255
254
|
specification_version: 4
|
256
255
|
summary: A library that makes it easier to use the Avro serialization format from
|
data/.circleci/config.yml
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
version: 2
|
2
|
-
jobs:
|
3
|
-
build:
|
4
|
-
environment:
|
5
|
-
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
|
6
|
-
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
|
7
|
-
docker:
|
8
|
-
- image: circleci/ruby:2.6.2
|
9
|
-
steps:
|
10
|
-
- checkout
|
11
|
-
- run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
|
12
|
-
- restore_cache:
|
13
|
-
keys:
|
14
|
-
# This branch if available
|
15
|
-
- v1-dep-{{ .Branch }}-
|
16
|
-
# Default branch if not
|
17
|
-
- v1-dep-master-
|
18
|
-
# Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
|
19
|
-
- v1-dep-
|
20
|
-
- run: gem install bundler --no-document
|
21
|
-
- run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
|
22
|
-
# Save dependency cache
|
23
|
-
- save_cache:
|
24
|
-
key: v1-dep-{{ .Branch }}-{{ epoch }}
|
25
|
-
paths:
|
26
|
-
- vendor/bundle
|
27
|
-
- ~/.bundle
|
28
|
-
- run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
|
29
|
-
- run:
|
30
|
-
command: bundle exec rspec --color --require spec_helper --format progress
|
31
|
-
- store_test_results:
|
32
|
-
path: /tmp/circleci-test-results
|
33
|
-
- store_artifacts:
|
34
|
-
path: /tmp/circleci-artifacts
|
35
|
-
- store_artifacts:
|
36
|
-
path: /tmp/circleci-test-results
|