avro_turf 1.6.0 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db17e2aed8677c0288cd1c817b2e4ecae50da47255663c2b0a75d64a43e91152
4
- data.tar.gz: e88f6481337b05b80db9710283daf20818fcad9621c09ca9ebd75e533ca5135c
3
+ metadata.gz: 67ff8ee9b578227cd54a33d76a95fed8bd2c9a396aa360ad9df9f52d01e5c25a
4
+ data.tar.gz: 8bca24236ce42f19f6db457ad13d76d33e89b18703dc0eb74a8adca53b2c7b6e
5
5
  SHA512:
6
- metadata.gz: 3c226b186752b4feed4519d329e7789dc02ad29793d3573ca2832a182074010652c16746308f785320018bed528d00b241c9b6354b2a3271a72278812d552d56
7
- data.tar.gz: c07515285fbd182752764d43c350d529f9a60bce1ad7d7aa6992cb1cda9fb958c189092d78a432a6e141eca380c8e9459857b29e2b038f03e32b645f5b4f0461
6
+ metadata.gz: 859e8fa6938679007704d35b1b4a54616da7637c83f41c58cbd10828216e0af198846e755334f465613ceb9086ec0c63a823dc2cbf3ddbdc191a97423d2e3147
7
+ data.tar.gz: 7900a85abaf9dd55ac3b89bba2d04935ddbafc850a92fb1941ff8984df864cfaa4bb5527ca4d48a74ef3715915d598044b7c486363909f2705ece83b0ffca50f
data/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.8.0
6
+
7
+ - add support for `Date` via appropriate logicalType defintion. This is a backwards incompatible change (#177)
8
+ - Fixed schema file cache truncation on multiple running instances and parallel access to the cache files.
9
+
10
+ ## v1.7.0
11
+
12
+ - Added extra params for the validation message schema before encode (#169)
13
+ - Fix infinite retry when loading schema with nested primary type in separate file (#165)
14
+
5
15
  ## v1.6.0
6
16
 
7
17
  - Schema registry path prefix (#162)
@@ -55,7 +65,7 @@
55
65
  ## v0.9.0
56
66
 
57
67
  - Compatibility with Avro v1.9.0 (#94)
58
- - Disable the auto registeration of schema (#95)
68
+ - Disable the auto registration of schema (#95)
59
69
  - abstracted caching from CachedConfluentSchemaRegistry (#74)
60
70
  - Load avro-patches if installed to silence deprecation errors (#85)
61
71
  - Make schema store to be thread safe (#92)
@@ -13,7 +13,7 @@ class AvroTurf::CachedConfluentSchemaRegistry
13
13
  # cache - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
14
14
  def initialize(upstream, cache: nil)
15
15
  @upstream = upstream
16
- @cache = cache || AvroTurf::InMemoryCache.new()
16
+ @cache = cache || AvroTurf::InMemoryCache.new
17
17
  end
18
18
 
19
19
  # Delegate the following methods to the upstream
@@ -34,7 +34,7 @@ class AvroTurf::CachedConfluentSchemaRegistry
34
34
 
35
35
  def subject_version(subject, version = 'latest')
36
36
  return @upstream.subject_version(subject, version) if version == 'latest'
37
-
37
+
38
38
  @cache.lookup_by_version(subject, version) ||
39
39
  @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
40
40
  end
@@ -1,5 +1,5 @@
1
1
  class Date
2
2
  def as_avro
3
- iso8601
3
+ self
4
4
  end
5
5
  end
@@ -1,20 +1,18 @@
1
1
  # A cache for the CachedConfluentSchemaRegistry.
2
2
  # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
3
- class AvroTurf::DiskCache < AvroTurf::InMemoryCache
3
+ class AvroTurf::DiskCache
4
4
 
5
5
  def initialize(disk_path, logger: Logger.new($stdout))
6
- super()
7
-
8
6
  @logger = logger
9
7
 
10
8
  # load the write-thru cache on startup, if it exists
11
9
  @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
12
10
  hash = read_from_disk_cache(@schemas_by_id_path)
13
- @schemas_by_id = hash if hash
11
+ @schemas_by_id = hash || {}
14
12
 
15
13
  @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
16
14
  hash = read_from_disk_cache(@ids_by_schema_path)
17
- @ids_by_schema = hash if hash
15
+ @ids_by_schema = hash || {}
18
16
 
19
17
  @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
20
18
  @schemas_by_subject_version = {}
@@ -24,15 +22,16 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
24
22
  # the write-thru cache (json) does not store keys in numeric format
25
23
  # so, convert id to a string for caching purposes
26
24
  def lookup_by_id(id)
27
- super(id.to_s)
25
+ @schemas_by_id[id.to_s]
28
26
  end
29
27
 
30
28
  # override to include write-thru cache after storing result from upstream
31
29
  def store_by_id(id, schema)
32
30
  # must return the value from storing the result (i.e. do not return result from file write)
33
- value = super(id.to_s, schema)
34
- File.write(@schemas_by_id_path, JSON.pretty_generate(@schemas_by_id))
35
- return value
31
+ @schemas_by_id[id.to_s] = schema
32
+ write_to_disk_cache(@schemas_by_id_path, @schemas_by_id)
33
+
34
+ schema
36
35
  end
37
36
 
38
37
  # override to use a json serializable cache key
@@ -45,7 +44,8 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
45
44
  def store_by_schema(subject, schema, id)
46
45
  key = "#{subject}#{schema}"
47
46
  @ids_by_schema[key] = id
48
- File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
47
+
48
+ write_to_disk_cache(@ids_by_schema_path, @ids_by_schema)
49
49
  id
50
50
  end
51
51
 
@@ -78,7 +78,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
78
78
  hash[key] = schema
79
79
  hash
80
80
  else
81
- {key => schema}
81
+ { key => schema }
82
82
  end
83
83
 
84
84
  write_to_disk_cache(@schemas_by_subject_version_path, hash)
@@ -90,17 +90,27 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
90
90
  # Parse the file from disk, if it exists and is not zero length
91
91
  private def read_from_disk_cache(path)
92
92
  if File.exist?(path)
93
- if File.size(path)!=0
94
- return JSON.parse(File.read(path))
93
+ if File.size(path) != 0
94
+ json_data = File.open(path, 'r') do |file|
95
+ file.flock(File::LOCK_SH)
96
+ file.read
97
+ end
98
+
99
+ return JSON.parse(json_data)
95
100
  else
96
101
  # just log a message if skipping zero length file
97
102
  @logger.warn "skipping JSON.parse of zero length file at #{path}"
98
103
  end
99
104
  end
100
- return nil
105
+
106
+ nil
101
107
  end
102
108
 
103
109
  private def write_to_disk_cache(path, hash)
104
- File.write(path, JSON.pretty_generate(hash))
110
+ # don't use "w" because it truncates the file before lock
111
+ File.open(path, File::RDWR | File::CREAT, 0644) do |file|
112
+ file.flock(File::LOCK_EX)
113
+ file.write(JSON.pretty_generate(hash))
114
+ end
105
115
  end
106
116
  end
@@ -21,8 +21,20 @@ class AvroTurf
21
21
  # 1: https://github.com/confluentinc/schema-registry
22
22
  class Messaging
23
23
  MAGIC_BYTE = [0].pack("C").freeze
24
- DecodedMessage = Struct.new(:schema_id, :writer_schema, :reader_schema, :message)
25
- private_constant(:DecodedMessage)
24
+
25
+ class DecodedMessage
26
+ attr_reader :schema_id
27
+ attr_reader :writer_schema
28
+ attr_reader :reader_schema
29
+ attr_reader :message
30
+
31
+ def initialize(schema_id, writer_schema, reader_schema, message)
32
+ @schema_id = schema_id
33
+ @writer_schema = writer_schema
34
+ @reader_schema = reader_schema
35
+ @message = message
36
+ end
37
+ end
26
38
 
27
39
  # Instantiate a new Messaging instance with the given configuration.
28
40
  #
@@ -34,7 +46,7 @@ class AvroTurf
34
46
  # namespace - The String default schema namespace.
35
47
  # registry_path_prefix - The String URL path prefix used to namespace schema registry requests (optional).
36
48
  # logger - The Logger that should be used to log information (optional).
37
- # proxy - Forward the request via proxy (optional).
49
+ # proxy - Forward the request via proxy (optional).
38
50
  # user - User for basic auth (optional).
39
51
  # password - Password for basic auth (optional).
40
52
  # ssl_ca_file - Name of file containing CA certificate (optional).
@@ -130,7 +142,7 @@ class AvroTurf
130
142
  writer.write(message, encoder)
131
143
 
132
144
  stream.string
133
- rescue Excon::Errors::NotFound
145
+ rescue Excon::Error::NotFound
134
146
  if schema_id
135
147
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
136
148
  else
@@ -88,7 +88,7 @@ class AvroTurf::SchemaStore
88
88
  local_schemas_cache.each do |schema_name, schema|
89
89
  local_schemas_cache.delete(schema_name) unless File.exist?(build_schema_path(schema_name))
90
90
  end
91
- load_schema!(fullname, local_schemas_cache)
91
+ load_schema!(fullname, @schemas.dup)
92
92
  else
93
93
  raise
94
94
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "1.6.0"
2
+ VERSION = "1.8.0"
3
3
  end
data/lib/avro_turf.rb CHANGED
@@ -16,7 +16,9 @@ end
16
16
 
17
17
  class AvroTurf
18
18
  class Error < StandardError; end
19
+
19
20
  class SchemaError < Error; end
21
+
20
22
  class SchemaNotFoundError < Error; end
21
23
 
22
24
  DEFAULT_SCHEMAS_PATH = "./schemas"
@@ -31,7 +33,7 @@ class AvroTurf
31
33
  # Currently, the only valid codec name is `deflate`.
32
34
  def initialize(schemas_path: nil, schema_store: nil, namespace: nil, codec: nil)
33
35
  @namespace = namespace
34
- @schema_store = schema_store ||
36
+ @schema_store = schema_store ||
35
37
  SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
36
38
  @codec = codec
37
39
  end
@@ -62,14 +64,20 @@ class AvroTurf
62
64
  # validate - The boolean for performing complete data validation before
63
65
  # encoding it, Avro::SchemaValidator::ValidationError with
64
66
  # a descriptive message will be raised in case of invalid message.
67
+ # validate_options - Hash for the Avro::SchemaValidator, default
68
+ # {recursive: true, encoded: false, fail_on_extra_fields: true}
65
69
  #
66
70
  # Returns nothing.
67
- def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace, validate: false)
71
+ def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace,
72
+ validate: false,
73
+ validate_options: { recursive: true,
74
+ encoded: false,
75
+ fail_on_extra_fields: true })
68
76
  schema = @schema_store.find(schema_name, namespace)
69
77
  writer = Avro::IO::DatumWriter.new(schema)
70
78
 
71
79
  if validate
72
- Avro::SchemaValidator.validate!(schema, data, recursive: true, encoded: false, fail_on_extra_fields: true)
80
+ Avro::SchemaValidator.validate!(schema, data, **validate_options)
73
81
  end
74
82
 
75
83
  dw = Avro::DataFile::Writer.new(stream, writer, schema, @codec)
@@ -16,6 +16,13 @@ describe AvroTurf do
16
16
  {
17
17
  "type": "string",
18
18
  "name": "full_name"
19
+ },
20
+ {
21
+ "name": "birth_date",
22
+ "type": {
23
+ "type": "int",
24
+ "logicalType": "date"
25
+ }
19
26
  }
20
27
  ]
21
28
  }
@@ -24,7 +31,8 @@ describe AvroTurf do
24
31
 
25
32
  it "encodes data with Avro" do
26
33
  data = {
27
- "full_name" => "John Doe"
34
+ "full_name" => "John Doe",
35
+ "birth_date" => Date.new(1934, 1, 2)
28
36
  }
29
37
 
30
38
  encoded_data = avro.encode(data, schema_name: "person")
@@ -36,7 +44,8 @@ describe AvroTurf do
36
44
  compressed_avro = AvroTurf.new(schemas_path: "spec/schemas/", codec: "deflate")
37
45
 
38
46
  data = {
39
- "full_name" => "John Doe" * 100
47
+ "full_name" => "John Doe" * 100,
48
+ "birth_date" => Date.new(1934, 1, 2)
40
49
  }
41
50
 
42
51
  uncompressed_data = avro.encode(data, schema_name: "person")
@@ -251,6 +260,31 @@ describe AvroTurf do
251
260
  expect { encode_to_stream }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/)
252
261
  end
253
262
  end
263
+
264
+ context "when the `fail_on_extra_fields` validation option is disabled" do
265
+ let(:message) { { "full_name" => "John Doe", "first_name" => "John", "last_name" => "Doe" } }
266
+ subject(:encode_to_stream) do
267
+ stream = StringIO.new
268
+ avro.encode_to_stream(message, stream: stream, schema_name: "message",
269
+ validate: true,
270
+ validate_options: { recursive: true, encoded: false, fail_on_extra_fields: false }
271
+ )
272
+ end
273
+
274
+ it "should not raise Avro::SchemaValidator::ValidationError with a message about extra field" do
275
+ define_schema "message.avsc", <<-AVSC
276
+ {
277
+ "name": "message",
278
+ "type": "record",
279
+ "fields": [
280
+ { "name": "full_name", "type": "string" }
281
+ ]
282
+ }
283
+ AVSC
284
+
285
+ expect { encode_to_stream }.not_to raise_error
286
+ end
287
+ end
254
288
  end
255
289
  end
256
290
 
@@ -304,5 +338,33 @@ describe AvroTurf do
304
338
  datum = { message: "hello" }
305
339
  expect(avro.valid?(datum, schema_name: "postcard")).to eq true
306
340
  end
341
+
342
+ it "handles logicalType of date in schema" do
343
+ define_schema "postcard.avsc", <<-AVSC
344
+ {
345
+ "name": "postcard",
346
+ "type": "record",
347
+ "fields": [
348
+ {
349
+ "name": "message",
350
+ "type": "string"
351
+ },
352
+ {
353
+ "name": "sent_date",
354
+ "type": {
355
+ "type": "int",
356
+ "logicalType": "date"
357
+ }
358
+ }
359
+ ]
360
+ }
361
+ AVSC
362
+
363
+ datum = {
364
+ message: "hello",
365
+ sent_date: Date.new(2022, 9, 11)
366
+ }
367
+ expect(avro.valid?(datum, schema_name: "postcard")).to eq true
368
+ end
307
369
  end
308
370
  end
@@ -1,6 +1,6 @@
1
1
  describe Date, "#as_avro" do
2
- it "returns an ISO8601 string describing the time" do
2
+ it "returns Date object describing the time" do
3
3
  date = Date.today
4
- expect(date.as_avro).to eq(date.iso8601)
4
+ expect(date.as_avro).to eq(date)
5
5
  end
6
6
  end
@@ -26,6 +26,33 @@ describe AvroTurf::SchemaStore do
26
26
  expect(schema.fullname).to eq "message"
27
27
  end
28
28
 
29
+ it "resolves missing references when nested schema is not a named type" do
30
+ define_schema "root.avsc", <<-AVSC
31
+ {
32
+ "type": "record",
33
+ "name": "root",
34
+ "fields": [
35
+ {
36
+ "type": "nested",
37
+ "name": "nested_value"
38
+ }
39
+ ]
40
+ }
41
+ AVSC
42
+
43
+ define_schema "nested.avsc", <<-AVSC
44
+ {
45
+ "name": "nested",
46
+ "type": "string",
47
+ "logicalType": "uuid"
48
+ }
49
+ AVSC
50
+
51
+ schema = store.find("root")
52
+
53
+ expect(schema.fullname).to eq "root"
54
+ end
55
+
29
56
  it "resolves missing references" do
30
57
  define_schema "person.avsc", <<-AVSC
31
58
  {
data/spec/spec_helper.rb CHANGED
@@ -22,6 +22,16 @@ module Helpers
22
22
  end
23
23
  end
24
24
 
25
+ # gem `fakefs` does not support flock for the file, and require patch
26
+ # https://github.com/fakefs/fakefs/issues/433
27
+ module FakeFS
28
+ class File < StringIO
29
+ def flock(*)
30
+ true
31
+ end
32
+ end
33
+ end
34
+
25
35
  RSpec.configure do |config|
26
36
  config.include FakeFS::SpecHelpers
27
37
  config.include Helpers
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-24 00:00:00.000000000 Z
11
+ date: 2022-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -156,7 +156,7 @@ dependencies:
156
156
  - - ">="
157
157
  - !ruby/object:Gem::Version
158
158
  version: '0'
159
- description:
159
+ description:
160
160
  email:
161
161
  - dasch@zendesk.com
162
162
  executables: []
@@ -251,8 +251,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
251
251
  - !ruby/object:Gem::Version
252
252
  version: '0'
253
253
  requirements: []
254
- rubygems_version: 3.1.2
255
- signing_key:
254
+ rubygems_version: 3.3.3
255
+ signing_key:
256
256
  specification_version: 4
257
257
  summary: A library that makes it easier to use the Avro serialization format from
258
258
  Ruby