avro_turf 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +5 -6
- data/CHANGELOG.md +5 -0
- data/README.md +6 -2
- data/lib/avro_turf.rb +13 -3
- data/lib/avro_turf/confluent_schema_registry.rb +4 -7
- data/lib/avro_turf/schema_store.rb +5 -0
- data/lib/avro_turf/version.rb +1 -1
- data/spec/avro_turf_spec.rb +157 -28
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebbfdd602e4d6c23894efe960ef21e01bc6bbc770c6076105260fc36de3c8e1e
|
4
|
+
data.tar.gz: 1113f99b28e58f641a13c9cec7b8b314bb113b34e07e3c27aab5559c524568b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56c4235851a345cf81abfd7170961f973eed218437187a20ef1b717e600ad19f6cbea8df41a755af5d54f2c98adac0ea3d5c236b822d4fdda10e17c1f6987d5c
|
7
|
+
data.tar.gz: 8caefcd9b974120c84f0a7910f26aff6106578eb80623119ac48c0c25c1775092c6e18b55a47cb97a2d000e7be7e110905cf0fb2d30a5f80e6bb0a800f98bc6c
|
data/.github/workflows/ruby.yml
CHANGED
@@ -8,15 +8,14 @@ jobs:
|
|
8
8
|
runs-on: ubuntu-latest
|
9
9
|
strategy:
|
10
10
|
matrix:
|
11
|
-
ruby
|
12
|
-
2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
|
11
|
+
ruby: [2.3, 2.4, 2.5, 2.6, 2.7, 3.0]
|
13
12
|
|
14
13
|
steps:
|
15
|
-
- uses: actions/checkout@
|
16
|
-
- name: Set up Ruby
|
17
|
-
uses:
|
14
|
+
- uses: actions/checkout@v2
|
15
|
+
- name: Set up Ruby ${{ matrix.ruby }}
|
16
|
+
uses: ruby/setup-ruby@v1
|
18
17
|
with:
|
19
|
-
ruby-version:
|
18
|
+
ruby-version: ${{ matrix.ruby }}
|
20
19
|
- name: Build and test with RSpec
|
21
20
|
run: |
|
22
21
|
gem install bundler
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,7 +18,7 @@ The aliases for the original names will be removed in a future release.
|
|
18
18
|
|
19
19
|
## Note about finding nested schemas
|
20
20
|
|
21
|
-
As of AvroTurf version 0.
|
21
|
+
As of AvroTurf version 1.0.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v1.0.0.
|
22
22
|
|
23
23
|
As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
|
24
24
|
|
@@ -44,7 +44,7 @@ As an example, if you have a `person` schema (defined in `my/schemas/contacts/pe
|
|
44
44
|
]
|
45
45
|
}
|
46
46
|
```
|
47
|
-
...this will no longer work in
|
47
|
+
...this will no longer work in v1.0.0:
|
48
48
|
```ruby
|
49
49
|
store = AvroTurf::SchemaStore.new(path: 'my/schemas')
|
50
50
|
store.load_schemas!
|
@@ -88,6 +88,10 @@ avro.decode(encoded_data, schema_name: "person")
|
|
88
88
|
|
89
89
|
# Encode some data using the named schema.
|
90
90
|
avro.encode({ "name" => "Jane", "age" => 28 }, schema_name: "person")
|
91
|
+
|
92
|
+
# Data can be validated before encoding to get a description of problem through
|
93
|
+
# Avro::SchemaValidator::ValidationError exception
|
94
|
+
avro.encode({ "titl" => "hello, world" }, schema_name: "person", validate: true)
|
91
95
|
```
|
92
96
|
|
93
97
|
### Inter-schema references
|
data/lib/avro_turf.rb
CHANGED
@@ -40,12 +40,15 @@ class AvroTurf
|
|
40
40
|
#
|
41
41
|
# data - The data that should be encoded.
|
42
42
|
# schema_name - The name of a schema in the `schemas_path`.
|
43
|
+
# validate - The boolean for performing complete data validation before
|
44
|
+
# encoding it, Avro::SchemaValidator::ValidationError with
|
45
|
+
# a descriptive message will be raised in case of invalid message.
|
43
46
|
#
|
44
47
|
# Returns a String containing the encoded data.
|
45
|
-
def encode(data, schema_name: nil, namespace: @namespace)
|
48
|
+
def encode(data, schema_name: nil, namespace: @namespace, validate: false)
|
46
49
|
stream = StringIO.new
|
47
50
|
|
48
|
-
encode_to_stream(data, stream: stream, schema_name: schema_name, namespace: namespace)
|
51
|
+
encode_to_stream(data, stream: stream, schema_name: schema_name, namespace: namespace, validate: validate)
|
49
52
|
|
50
53
|
stream.string
|
51
54
|
end
|
@@ -56,12 +59,19 @@ class AvroTurf
|
|
56
59
|
# data - The data that should be encoded.
|
57
60
|
# schema_name - The name of a schema in the `schemas_path`.
|
58
61
|
# stream - An IO object that the encoded data should be written to (optional).
|
62
|
+
# validate - The boolean for performing complete data validation before
|
63
|
+
# encoding it, Avro::SchemaValidator::ValidationError with
|
64
|
+
# a descriptive message will be raised in case of invalid message.
|
59
65
|
#
|
60
66
|
# Returns nothing.
|
61
|
-
def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace)
|
67
|
+
def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace, validate: false)
|
62
68
|
schema = @schema_store.find(schema_name, namespace)
|
63
69
|
writer = Avro::IO::DatumWriter.new(schema)
|
64
70
|
|
71
|
+
if validate
|
72
|
+
Avro::SchemaValidator.validate!(schema, data, recursive: true, encoded: false, fail_on_extra_fields: true)
|
73
|
+
end
|
74
|
+
|
65
75
|
dw = Avro::DataFile::Writer.new(stream, writer, schema, @codec)
|
66
76
|
dw << data.as_avro
|
67
77
|
dw.close
|
@@ -40,9 +40,7 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def register(subject, schema)
|
43
|
-
data = post("/subjects/#{subject}/versions", body: {
|
44
|
-
schema: schema.to_s
|
45
|
-
}.to_json)
|
43
|
+
data = post("/subjects/#{subject}/versions", body: { schema: schema.to_s }.to_json)
|
46
44
|
|
47
45
|
id = data.fetch("id")
|
48
46
|
|
@@ -82,8 +80,7 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
82
80
|
# http://docs.confluent.io/3.1.2/schema-registry/docs/api.html#compatibility
|
83
81
|
def compatible?(subject, schema, version = 'latest')
|
84
82
|
data = post("/compatibility/subjects/#{subject}/versions/#{version}",
|
85
|
-
expects: [200, 404],
|
86
|
-
body: { schema: schema.to_s }.to_json)
|
83
|
+
expects: [200, 404], body: { schema: schema.to_s }.to_json)
|
87
84
|
data.fetch('is_compatible', false) unless data.has_key?('error_code')
|
88
85
|
end
|
89
86
|
|
@@ -94,7 +91,7 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
94
91
|
|
95
92
|
# Update global config
|
96
93
|
def update_global_config(config)
|
97
|
-
put("/config",
|
94
|
+
put("/config", body: config.to_json)
|
98
95
|
end
|
99
96
|
|
100
97
|
# Get config for subject
|
@@ -104,7 +101,7 @@ class AvroTurf::ConfluentSchemaRegistry
|
|
104
101
|
|
105
102
|
# Update config for subject
|
106
103
|
def update_subject_config(subject, config)
|
107
|
-
put("/config/#{subject}",
|
104
|
+
put("/config/#{subject}", body: config.to_json)
|
108
105
|
end
|
109
106
|
|
110
107
|
private
|
@@ -83,6 +83,11 @@ class AvroTurf::SchemaStore
|
|
83
83
|
# has been resolved and use the now-updated local_schemas_cache to
|
84
84
|
# pick up where we left off.
|
85
85
|
local_schemas_cache.delete(fullname)
|
86
|
+
# Ensure enum schemas are cleaned up to avoid conflicts when re-parsing
|
87
|
+
# schema.
|
88
|
+
local_schemas_cache.each do |schema_name, schema|
|
89
|
+
local_schemas_cache.delete(schema_name) if schema.type_sym == :enum
|
90
|
+
end
|
86
91
|
load_schema!(fullname, local_schemas_cache)
|
87
92
|
else
|
88
93
|
raise
|
data/lib/avro_turf/version.rb
CHANGED
data/spec/avro_turf_spec.rb
CHANGED
@@ -6,44 +6,112 @@ describe AvroTurf do
|
|
6
6
|
end
|
7
7
|
|
8
8
|
describe "#encode" do
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
9
|
+
context "when using plain schema" do
|
10
|
+
before do
|
11
|
+
define_schema "person.avsc", <<-AVSC
|
12
|
+
{
|
13
|
+
"name": "person",
|
14
|
+
"type": "record",
|
15
|
+
"fields": [
|
16
|
+
{
|
17
|
+
"type": "string",
|
18
|
+
"name": "full_name"
|
19
|
+
}
|
20
|
+
]
|
21
|
+
}
|
22
|
+
AVSC
|
23
|
+
end
|
24
|
+
|
25
|
+
it "encodes data with Avro" do
|
26
|
+
data = {
|
27
|
+
"full_name" => "John Doe"
|
20
28
|
}
|
21
|
-
AVSC
|
22
|
-
end
|
23
29
|
|
24
|
-
|
25
|
-
data = {
|
26
|
-
"full_name" => "John Doe"
|
27
|
-
}
|
30
|
+
encoded_data = avro.encode(data, schema_name: "person")
|
28
31
|
|
29
|
-
|
32
|
+
expect(avro.decode(encoded_data)).to eq(data)
|
33
|
+
end
|
30
34
|
|
31
|
-
|
35
|
+
it "allows specifying a codec that should be used to compress messages" do
|
36
|
+
compressed_avro = AvroTurf.new(schemas_path: "spec/schemas/", codec: "deflate")
|
37
|
+
|
38
|
+
data = {
|
39
|
+
"full_name" => "John Doe" * 100
|
40
|
+
}
|
41
|
+
|
42
|
+
uncompressed_data = avro.encode(data, schema_name: "person")
|
43
|
+
compressed_data = compressed_avro.encode(data, schema_name: "person")
|
44
|
+
|
45
|
+
expect(compressed_data.bytesize).to be < uncompressed_data.bytesize
|
46
|
+
expect(compressed_avro.decode(compressed_data)).to eq(data)
|
47
|
+
end
|
32
48
|
end
|
33
49
|
|
34
|
-
|
35
|
-
|
50
|
+
context 'when using nested schemas' do
|
51
|
+
before do
|
52
|
+
define_schema "post.avsc", <<-AVSC
|
53
|
+
{
|
54
|
+
"name": "post",
|
55
|
+
"type": "record",
|
56
|
+
"fields": [
|
57
|
+
{
|
58
|
+
"name": "tag",
|
59
|
+
"type": {
|
60
|
+
"type": "enum",
|
61
|
+
"name": "tag",
|
62
|
+
"symbols": ["foo", "bar"]
|
63
|
+
}
|
64
|
+
},
|
65
|
+
{
|
66
|
+
"name": "messages",
|
67
|
+
"type": {
|
68
|
+
"type": "array",
|
69
|
+
"items": "message"
|
70
|
+
}
|
71
|
+
}
|
72
|
+
]
|
73
|
+
}
|
74
|
+
AVSC
|
75
|
+
|
76
|
+
define_schema "message.avsc", <<-AVSC
|
77
|
+
{
|
78
|
+
"name": "message",
|
79
|
+
"type": "record",
|
80
|
+
"fields": [
|
81
|
+
{
|
82
|
+
"type": "string",
|
83
|
+
"name": "content"
|
84
|
+
},
|
85
|
+
{
|
86
|
+
"name": "label",
|
87
|
+
"type": {
|
88
|
+
"type": "enum",
|
89
|
+
"name": "label",
|
90
|
+
"symbols": ["foo", "bar"]
|
91
|
+
}
|
92
|
+
}
|
93
|
+
]
|
94
|
+
}
|
95
|
+
AVSC
|
96
|
+
end
|
36
97
|
|
37
|
-
data
|
38
|
-
|
39
|
-
|
98
|
+
it "encodes data with Avro" do
|
99
|
+
data = {
|
100
|
+
"tag" => "foo",
|
101
|
+
"messages" => [
|
102
|
+
{
|
103
|
+
"content" => "hello",
|
104
|
+
"label" => "bar"
|
105
|
+
}
|
106
|
+
]
|
107
|
+
}
|
40
108
|
|
41
|
-
|
42
|
-
compressed_data = compressed_avro.encode(data, schema_name: "person")
|
109
|
+
encoded_data = avro.encode(data, schema_name: "post")
|
43
110
|
|
44
|
-
|
45
|
-
|
111
|
+
expect(avro.decode(encoded_data)).to eq(data)
|
112
|
+
end
|
46
113
|
end
|
114
|
+
|
47
115
|
end
|
48
116
|
|
49
117
|
describe "#decode" do
|
@@ -105,6 +173,67 @@ describe AvroTurf do
|
|
105
173
|
|
106
174
|
expect(avro.decode(stream.string)).to eq "hello"
|
107
175
|
end
|
176
|
+
|
177
|
+
context "validating" do
|
178
|
+
subject(:encode_to_stream) do
|
179
|
+
stream = StringIO.new
|
180
|
+
avro.encode_to_stream(message, stream: stream, schema_name: "message", validate: true)
|
181
|
+
end
|
182
|
+
|
183
|
+
context "with a valid message" do
|
184
|
+
let(:message) { { "full_name" => "John Doe" } }
|
185
|
+
|
186
|
+
it "does not raise any error" do
|
187
|
+
define_schema "message.avsc", <<-AVSC
|
188
|
+
{
|
189
|
+
"name": "message",
|
190
|
+
"type": "record",
|
191
|
+
"fields": [
|
192
|
+
{ "name": "full_name", "type": "string" }
|
193
|
+
]
|
194
|
+
}
|
195
|
+
AVSC
|
196
|
+
|
197
|
+
expect { encode_to_stream }.not_to raise_error
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
context "when message has wrong type" do
|
202
|
+
let(:message) { { "full_name" => 123 } }
|
203
|
+
|
204
|
+
it "raises Avro::SchemaValidator::ValidationError with a message about type mismatch" do
|
205
|
+
define_schema "message.avsc", <<-AVSC
|
206
|
+
{
|
207
|
+
"name": "message",
|
208
|
+
"type": "record",
|
209
|
+
"fields": [
|
210
|
+
{ "name": "full_name", "type": "string" }
|
211
|
+
]
|
212
|
+
}
|
213
|
+
AVSC
|
214
|
+
|
215
|
+
expect { encode_to_stream }.to raise_error(Avro::SchemaValidator::ValidationError, /\.full_name expected type string, got int/)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
context "when message contains extra fields (typo in key)" do
|
220
|
+
let(:message) { { "fulll_name" => "John Doe" } }
|
221
|
+
|
222
|
+
it "raises Avro::SchemaValidator::ValidationError with a message about extra field" do
|
223
|
+
define_schema "message.avsc", <<-AVSC
|
224
|
+
{
|
225
|
+
"name": "message",
|
226
|
+
"type": "record",
|
227
|
+
"fields": [
|
228
|
+
{ "name": "full_name", "type": "string" }
|
229
|
+
]
|
230
|
+
}
|
231
|
+
AVSC
|
232
|
+
|
233
|
+
expect { encode_to_stream }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
108
237
|
end
|
109
238
|
|
110
239
|
describe "#decode_stream" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: avro_turf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|