avro_turf 1.6.0 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/avro_turf/cached_confluent_schema_registry.rb +2 -2
- data/lib/avro_turf/core_ext/date.rb +1 -1
- data/lib/avro_turf/disk_cache.rb +25 -15
- data/lib/avro_turf/messaging.rb +16 -4
- data/lib/avro_turf/schema_store.rb +1 -1
- data/lib/avro_turf/version.rb +1 -1
- data/lib/avro_turf.rb +11 -3
- data/spec/avro_turf_spec.rb +64 -2
- data/spec/core_ext/date_spec.rb +2 -2
- data/spec/schema_store_spec.rb +27 -0
- data/spec/spec_helper.rb +10 -0
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67ff8ee9b578227cd54a33d76a95fed8bd2c9a396aa360ad9df9f52d01e5c25a
|
4
|
+
data.tar.gz: 8bca24236ce42f19f6db457ad13d76d33e89b18703dc0eb74a8adca53b2c7b6e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 859e8fa6938679007704d35b1b4a54616da7637c83f41c58cbd10828216e0af198846e755334f465613ceb9086ec0c63a823dc2cbf3ddbdc191a97423d2e3147
|
7
|
+
data.tar.gz: 7900a85abaf9dd55ac3b89bba2d04935ddbafc850a92fb1941ff8984df864cfaa4bb5527ca4d48a74ef3715915d598044b7c486363909f2705ece83b0ffca50f
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,16 @@
|
|
2
2
|
|
3
3
|
## Unreleased
|
4
4
|
|
5
|
+
## v1.8.0
|
6
|
+
|
7
|
+
- add support for `Date` via appropriate logicalType defintion. This is a backwards incompatible change (#177)
|
8
|
+
- Fixed schema file cache truncation on multiple running instances and parallel access to the cache files.
|
9
|
+
|
10
|
+
## v1.7.0
|
11
|
+
|
12
|
+
- Added extra params for the validation message schema before encode (#169)
|
13
|
+
- Fix infinite retry when loading schema with nested primary type in separate file (#165)
|
14
|
+
|
5
15
|
## v1.6.0
|
6
16
|
|
7
17
|
- Schema registry path prefix (#162)
|
@@ -55,7 +65,7 @@
|
|
55
65
|
## v0.9.0
|
56
66
|
|
57
67
|
- Compatibility with Avro v1.9.0 (#94)
|
58
|
-
- Disable the auto
|
68
|
+
- Disable the auto registration of schema (#95)
|
59
69
|
- abstracted caching from CachedConfluentSchemaRegistry (#74)
|
60
70
|
- Load avro-patches if installed to silence deprecation errors (#85)
|
61
71
|
- Make schema store to be thread safe (#92)
|
@@ -13,7 +13,7 @@ class AvroTurf::CachedConfluentSchemaRegistry
|
|
13
13
|
# cache - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
|
14
14
|
def initialize(upstream, cache: nil)
|
15
15
|
@upstream = upstream
|
16
|
-
@cache = cache || AvroTurf::InMemoryCache.new
|
16
|
+
@cache = cache || AvroTurf::InMemoryCache.new
|
17
17
|
end
|
18
18
|
|
19
19
|
# Delegate the following methods to the upstream
|
@@ -34,7 +34,7 @@ class AvroTurf::CachedConfluentSchemaRegistry
|
|
34
34
|
|
35
35
|
def subject_version(subject, version = 'latest')
|
36
36
|
return @upstream.subject_version(subject, version) if version == 'latest'
|
37
|
-
|
37
|
+
|
38
38
|
@cache.lookup_by_version(subject, version) ||
|
39
39
|
@cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
|
40
40
|
end
|
data/lib/avro_turf/disk_cache.rb
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
# A cache for the CachedConfluentSchemaRegistry.
|
2
2
|
# Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
|
3
|
-
class AvroTurf::DiskCache
|
3
|
+
class AvroTurf::DiskCache
|
4
4
|
|
5
5
|
def initialize(disk_path, logger: Logger.new($stdout))
|
6
|
-
super()
|
7
|
-
|
8
6
|
@logger = logger
|
9
7
|
|
10
8
|
# load the write-thru cache on startup, if it exists
|
11
9
|
@schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
|
12
10
|
hash = read_from_disk_cache(@schemas_by_id_path)
|
13
|
-
@schemas_by_id = hash
|
11
|
+
@schemas_by_id = hash || {}
|
14
12
|
|
15
13
|
@ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
|
16
14
|
hash = read_from_disk_cache(@ids_by_schema_path)
|
17
|
-
@ids_by_schema = hash
|
15
|
+
@ids_by_schema = hash || {}
|
18
16
|
|
19
17
|
@schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
|
20
18
|
@schemas_by_subject_version = {}
|
@@ -24,15 +22,16 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
24
22
|
# the write-thru cache (json) does not store keys in numeric format
|
25
23
|
# so, convert id to a string for caching purposes
|
26
24
|
def lookup_by_id(id)
|
27
|
-
|
25
|
+
@schemas_by_id[id.to_s]
|
28
26
|
end
|
29
27
|
|
30
28
|
# override to include write-thru cache after storing result from upstream
|
31
29
|
def store_by_id(id, schema)
|
32
30
|
# must return the value from storing the result (i.e. do not return result from file write)
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
@schemas_by_id[id.to_s] = schema
|
32
|
+
write_to_disk_cache(@schemas_by_id_path, @schemas_by_id)
|
33
|
+
|
34
|
+
schema
|
36
35
|
end
|
37
36
|
|
38
37
|
# override to use a json serializable cache key
|
@@ -45,7 +44,8 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
45
44
|
def store_by_schema(subject, schema, id)
|
46
45
|
key = "#{subject}#{schema}"
|
47
46
|
@ids_by_schema[key] = id
|
48
|
-
|
47
|
+
|
48
|
+
write_to_disk_cache(@ids_by_schema_path, @ids_by_schema)
|
49
49
|
id
|
50
50
|
end
|
51
51
|
|
@@ -78,7 +78,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
78
78
|
hash[key] = schema
|
79
79
|
hash
|
80
80
|
else
|
81
|
-
{key => schema}
|
81
|
+
{ key => schema }
|
82
82
|
end
|
83
83
|
|
84
84
|
write_to_disk_cache(@schemas_by_subject_version_path, hash)
|
@@ -90,17 +90,27 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
|
|
90
90
|
# Parse the file from disk, if it exists and is not zero length
|
91
91
|
private def read_from_disk_cache(path)
|
92
92
|
if File.exist?(path)
|
93
|
-
if File.size(path)!=0
|
94
|
-
|
93
|
+
if File.size(path) != 0
|
94
|
+
json_data = File.open(path, 'r') do |file|
|
95
|
+
file.flock(File::LOCK_SH)
|
96
|
+
file.read
|
97
|
+
end
|
98
|
+
|
99
|
+
return JSON.parse(json_data)
|
95
100
|
else
|
96
101
|
# just log a message if skipping zero length file
|
97
102
|
@logger.warn "skipping JSON.parse of zero length file at #{path}"
|
98
103
|
end
|
99
104
|
end
|
100
|
-
|
105
|
+
|
106
|
+
nil
|
101
107
|
end
|
102
108
|
|
103
109
|
private def write_to_disk_cache(path, hash)
|
104
|
-
|
110
|
+
# don't use "w" because it truncates the file before lock
|
111
|
+
File.open(path, File::RDWR | File::CREAT, 0644) do |file|
|
112
|
+
file.flock(File::LOCK_EX)
|
113
|
+
file.write(JSON.pretty_generate(hash))
|
114
|
+
end
|
105
115
|
end
|
106
116
|
end
|
data/lib/avro_turf/messaging.rb
CHANGED
@@ -21,8 +21,20 @@ class AvroTurf
|
|
21
21
|
# 1: https://github.com/confluentinc/schema-registry
|
22
22
|
class Messaging
|
23
23
|
MAGIC_BYTE = [0].pack("C").freeze
|
24
|
-
|
25
|
-
|
24
|
+
|
25
|
+
class DecodedMessage
|
26
|
+
attr_reader :schema_id
|
27
|
+
attr_reader :writer_schema
|
28
|
+
attr_reader :reader_schema
|
29
|
+
attr_reader :message
|
30
|
+
|
31
|
+
def initialize(schema_id, writer_schema, reader_schema, message)
|
32
|
+
@schema_id = schema_id
|
33
|
+
@writer_schema = writer_schema
|
34
|
+
@reader_schema = reader_schema
|
35
|
+
@message = message
|
36
|
+
end
|
37
|
+
end
|
26
38
|
|
27
39
|
# Instantiate a new Messaging instance with the given configuration.
|
28
40
|
#
|
@@ -34,7 +46,7 @@ class AvroTurf
|
|
34
46
|
# namespace - The String default schema namespace.
|
35
47
|
# registry_path_prefix - The String URL path prefix used to namespace schema registry requests (optional).
|
36
48
|
# logger - The Logger that should be used to log information (optional).
|
37
|
-
#
|
49
|
+
# proxy - Forward the request via proxy (optional).
|
38
50
|
# user - User for basic auth (optional).
|
39
51
|
# password - Password for basic auth (optional).
|
40
52
|
# ssl_ca_file - Name of file containing CA certificate (optional).
|
@@ -130,7 +142,7 @@ class AvroTurf
|
|
130
142
|
writer.write(message, encoder)
|
131
143
|
|
132
144
|
stream.string
|
133
|
-
rescue Excon::
|
145
|
+
rescue Excon::Error::NotFound
|
134
146
|
if schema_id
|
135
147
|
raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
|
136
148
|
else
|
@@ -88,7 +88,7 @@ class AvroTurf::SchemaStore
|
|
88
88
|
local_schemas_cache.each do |schema_name, schema|
|
89
89
|
local_schemas_cache.delete(schema_name) unless File.exist?(build_schema_path(schema_name))
|
90
90
|
end
|
91
|
-
load_schema!(fullname,
|
91
|
+
load_schema!(fullname, @schemas.dup)
|
92
92
|
else
|
93
93
|
raise
|
94
94
|
end
|
data/lib/avro_turf/version.rb
CHANGED
data/lib/avro_turf.rb
CHANGED
@@ -16,7 +16,9 @@ end
|
|
16
16
|
|
17
17
|
class AvroTurf
|
18
18
|
class Error < StandardError; end
|
19
|
+
|
19
20
|
class SchemaError < Error; end
|
21
|
+
|
20
22
|
class SchemaNotFoundError < Error; end
|
21
23
|
|
22
24
|
DEFAULT_SCHEMAS_PATH = "./schemas"
|
@@ -31,7 +33,7 @@ class AvroTurf
|
|
31
33
|
# Currently, the only valid codec name is `deflate`.
|
32
34
|
def initialize(schemas_path: nil, schema_store: nil, namespace: nil, codec: nil)
|
33
35
|
@namespace = namespace
|
34
|
-
@schema_store = schema_store ||
|
36
|
+
@schema_store = schema_store ||
|
35
37
|
SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
|
36
38
|
@codec = codec
|
37
39
|
end
|
@@ -62,14 +64,20 @@ class AvroTurf
|
|
62
64
|
# validate - The boolean for performing complete data validation before
|
63
65
|
# encoding it, Avro::SchemaValidator::ValidationError with
|
64
66
|
# a descriptive message will be raised in case of invalid message.
|
67
|
+
# validate_options - Hash for the Avro::SchemaValidator, default
|
68
|
+
# {recursive: true, encoded: false, fail_on_extra_fields: true}
|
65
69
|
#
|
66
70
|
# Returns nothing.
|
67
|
-
def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace,
|
71
|
+
def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace,
|
72
|
+
validate: false,
|
73
|
+
validate_options: { recursive: true,
|
74
|
+
encoded: false,
|
75
|
+
fail_on_extra_fields: true })
|
68
76
|
schema = @schema_store.find(schema_name, namespace)
|
69
77
|
writer = Avro::IO::DatumWriter.new(schema)
|
70
78
|
|
71
79
|
if validate
|
72
|
-
Avro::SchemaValidator.validate!(schema, data,
|
80
|
+
Avro::SchemaValidator.validate!(schema, data, **validate_options)
|
73
81
|
end
|
74
82
|
|
75
83
|
dw = Avro::DataFile::Writer.new(stream, writer, schema, @codec)
|
data/spec/avro_turf_spec.rb
CHANGED
@@ -16,6 +16,13 @@ describe AvroTurf do
|
|
16
16
|
{
|
17
17
|
"type": "string",
|
18
18
|
"name": "full_name"
|
19
|
+
},
|
20
|
+
{
|
21
|
+
"name": "birth_date",
|
22
|
+
"type": {
|
23
|
+
"type": "int",
|
24
|
+
"logicalType": "date"
|
25
|
+
}
|
19
26
|
}
|
20
27
|
]
|
21
28
|
}
|
@@ -24,7 +31,8 @@ describe AvroTurf do
|
|
24
31
|
|
25
32
|
it "encodes data with Avro" do
|
26
33
|
data = {
|
27
|
-
"full_name" => "John Doe"
|
34
|
+
"full_name" => "John Doe",
|
35
|
+
"birth_date" => Date.new(1934, 1, 2)
|
28
36
|
}
|
29
37
|
|
30
38
|
encoded_data = avro.encode(data, schema_name: "person")
|
@@ -36,7 +44,8 @@ describe AvroTurf do
|
|
36
44
|
compressed_avro = AvroTurf.new(schemas_path: "spec/schemas/", codec: "deflate")
|
37
45
|
|
38
46
|
data = {
|
39
|
-
"full_name" => "John Doe" * 100
|
47
|
+
"full_name" => "John Doe" * 100,
|
48
|
+
"birth_date" => Date.new(1934, 1, 2)
|
40
49
|
}
|
41
50
|
|
42
51
|
uncompressed_data = avro.encode(data, schema_name: "person")
|
@@ -251,6 +260,31 @@ describe AvroTurf do
|
|
251
260
|
expect { encode_to_stream }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/)
|
252
261
|
end
|
253
262
|
end
|
263
|
+
|
264
|
+
context "when the `fail_on_extra_fields` validation option is disabled" do
|
265
|
+
let(:message) { { "full_name" => "John Doe", "first_name" => "John", "last_name" => "Doe" } }
|
266
|
+
subject(:encode_to_stream) do
|
267
|
+
stream = StringIO.new
|
268
|
+
avro.encode_to_stream(message, stream: stream, schema_name: "message",
|
269
|
+
validate: true,
|
270
|
+
validate_options: { recursive: true, encoded: false, fail_on_extra_fields: false }
|
271
|
+
)
|
272
|
+
end
|
273
|
+
|
274
|
+
it "should not raise Avro::SchemaValidator::ValidationError with a message about extra field" do
|
275
|
+
define_schema "message.avsc", <<-AVSC
|
276
|
+
{
|
277
|
+
"name": "message",
|
278
|
+
"type": "record",
|
279
|
+
"fields": [
|
280
|
+
{ "name": "full_name", "type": "string" }
|
281
|
+
]
|
282
|
+
}
|
283
|
+
AVSC
|
284
|
+
|
285
|
+
expect { encode_to_stream }.not_to raise_error
|
286
|
+
end
|
287
|
+
end
|
254
288
|
end
|
255
289
|
end
|
256
290
|
|
@@ -304,5 +338,33 @@ describe AvroTurf do
|
|
304
338
|
datum = { message: "hello" }
|
305
339
|
expect(avro.valid?(datum, schema_name: "postcard")).to eq true
|
306
340
|
end
|
341
|
+
|
342
|
+
it "handles logicalType of date in schema" do
|
343
|
+
define_schema "postcard.avsc", <<-AVSC
|
344
|
+
{
|
345
|
+
"name": "postcard",
|
346
|
+
"type": "record",
|
347
|
+
"fields": [
|
348
|
+
{
|
349
|
+
"name": "message",
|
350
|
+
"type": "string"
|
351
|
+
},
|
352
|
+
{
|
353
|
+
"name": "sent_date",
|
354
|
+
"type": {
|
355
|
+
"type": "int",
|
356
|
+
"logicalType": "date"
|
357
|
+
}
|
358
|
+
}
|
359
|
+
]
|
360
|
+
}
|
361
|
+
AVSC
|
362
|
+
|
363
|
+
datum = {
|
364
|
+
message: "hello",
|
365
|
+
sent_date: Date.new(2022, 9, 11)
|
366
|
+
}
|
367
|
+
expect(avro.valid?(datum, schema_name: "postcard")).to eq true
|
368
|
+
end
|
307
369
|
end
|
308
370
|
end
|
data/spec/core_ext/date_spec.rb
CHANGED
data/spec/schema_store_spec.rb
CHANGED
@@ -26,6 +26,33 @@ describe AvroTurf::SchemaStore do
|
|
26
26
|
expect(schema.fullname).to eq "message"
|
27
27
|
end
|
28
28
|
|
29
|
+
it "resolves missing references when nested schema is not a named type" do
|
30
|
+
define_schema "root.avsc", <<-AVSC
|
31
|
+
{
|
32
|
+
"type": "record",
|
33
|
+
"name": "root",
|
34
|
+
"fields": [
|
35
|
+
{
|
36
|
+
"type": "nested",
|
37
|
+
"name": "nested_value"
|
38
|
+
}
|
39
|
+
]
|
40
|
+
}
|
41
|
+
AVSC
|
42
|
+
|
43
|
+
define_schema "nested.avsc", <<-AVSC
|
44
|
+
{
|
45
|
+
"name": "nested",
|
46
|
+
"type": "string",
|
47
|
+
"logicalType": "uuid"
|
48
|
+
}
|
49
|
+
AVSC
|
50
|
+
|
51
|
+
schema = store.find("root")
|
52
|
+
|
53
|
+
expect(schema.fullname).to eq "root"
|
54
|
+
end
|
55
|
+
|
29
56
|
it "resolves missing references" do
|
30
57
|
define_schema "person.avsc", <<-AVSC
|
31
58
|
{
|
data/spec/spec_helper.rb
CHANGED
@@ -22,6 +22,16 @@ module Helpers
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
+
# gem `fakefs` does not support flock for the file, and require patch
|
26
|
+
# https://github.com/fakefs/fakefs/issues/433
|
27
|
+
module FakeFS
|
28
|
+
class File < StringIO
|
29
|
+
def flock(*)
|
30
|
+
true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
25
35
|
RSpec.configure do |config|
|
26
36
|
config.include FakeFS::SpecHelpers
|
27
37
|
config.include Helpers
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: avro_turf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -156,7 +156,7 @@ dependencies:
|
|
156
156
|
- - ">="
|
157
157
|
- !ruby/object:Gem::Version
|
158
158
|
version: '0'
|
159
|
-
description:
|
159
|
+
description:
|
160
160
|
email:
|
161
161
|
- dasch@zendesk.com
|
162
162
|
executables: []
|
@@ -251,8 +251,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
251
251
|
- !ruby/object:Gem::Version
|
252
252
|
version: '0'
|
253
253
|
requirements: []
|
254
|
-
rubygems_version: 3.
|
255
|
-
signing_key:
|
254
|
+
rubygems_version: 3.3.3
|
255
|
+
signing_key:
|
256
256
|
specification_version: 4
|
257
257
|
summary: A library that makes it easier to use the Avro serialization format from
|
258
258
|
Ruby
|