avro_turf 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2345cdad9c7472c5fd79aa3710150c40095ff2b970b28ea17b3db8b951fd143f
4
- data.tar.gz: 264fbec47c582106134b51fc80823f93a82c9f1053a2d61e8899eac72a27256f
3
+ metadata.gz: 22e1ca443d93f4f1c38b9626ad1a83db6352a4aac397256a8a3bfae2ca462d9c
4
+ data.tar.gz: 2621ae9378b2511cee81522247c5f4df883ee9c25efe92839cdc5af22543bc1f
5
5
  SHA512:
6
- metadata.gz: 2f17112a66c063514d56c7a4a5d785fa61e1c2079b7a02eede619b1b030997ba689b897ac8fc55f355fc781a1fd48dd77c545315478155b70853a0263fd571ac
7
- data.tar.gz: 9d34639b83ee204e15143cc073aacd24157556eaede70e855384ba43ec769c6ed12505ffa2cf6d17ae6e216095c0ad097ace8af42579efaec4fc34ad92c482fe
6
+ metadata.gz: 56f2e3885be65423da7be65fe067151b3fd270ce13af76455a0f71cccbf6d68b09661e75f882c50ea9745f9cdc21e78ce8bd527d8123fa4b0c094dfaf6baa03a
7
+ data.tar.gz: 4b8e92503870f8c83c8af5457786da3c83823706d957a96986488f2800166ae02846141e9eeb28c261c6b0e0f5d7922244f65dfb47e39bbc96a9a29fea075e55
@@ -0,0 +1,24 @@
1
+ on:
2
+ push:
3
+ branches:
4
+ - master
5
+ jobs:
6
+ push:
7
+ name: Push gem to RubyGems.org
8
+ runs-on: ubuntu-latest
9
+
10
+ permissions:
11
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
12
+ contents: write # IMPORTANT: this permission is required for `rake release` to push the release tag
13
+
14
+ steps:
15
+ # Set up
16
+ - uses: actions/checkout@v4
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true
21
+ ruby-version: ruby
22
+
23
+ # Release
24
+ - uses: rubygems/release-gem@v1
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.17.0
6
+
7
+ - Add `register_schemas` option to `encode` method [#210](https://github.com/dasch/avro_turf/pull/210)
8
+
5
9
  ## v1.16.0
6
10
 
7
11
  - Add compatibility with Avro v1.12.x.
data/README.md CHANGED
@@ -92,6 +92,9 @@ avro.encode({ "name" => "Jane", "age" => 28 }, schema_name: "person")
92
92
  # Data can be validated before encoding to get a description of problem through
93
93
  # Avro::SchemaValidator::ValidationError exception
94
94
  avro.encode({ "titl" => "hello, world" }, schema_name: "person", validate: true)
95
+
96
+ # If you do not want to register the schema in case it does not exist, you can pass the register_schemas option as false
97
+ avro.encode({ "name" => "Jane", "age" => 28 }, schema_name: "person", register_schemas: false)
95
98
  ```
96
99
 
97
100
  ### Inter-schema references
@@ -17,13 +17,17 @@ class AvroTurf::CachedConfluentSchemaRegistry
17
17
  end
18
18
 
19
19
  # Delegate the following methods to the upstream
20
- %i(subjects subject_versions schema_subject_versions check compatible?
20
+ %i(subjects subject_versions schema_subject_versions compatible?
21
21
  global_config update_global_config subject_config update_subject_config).each do |name|
22
22
  define_method(name) do |*args|
23
23
  instance_variable_get(:@upstream).send(name, *args)
24
24
  end
25
25
  end
26
26
 
27
+ def check(subject, schema)
28
+ @cache.lookup_data_by_schema(subject, schema) || @cache.store_data_by_schema(subject, schema, @upstream.check(subject, schema))
29
+ end
30
+
27
31
  def fetch(id)
28
32
  @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
29
33
  end
@@ -16,6 +16,10 @@ class AvroTurf::DiskCache
16
16
 
17
17
  @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
18
18
  @schemas_by_subject_version = {}
19
+
20
+ @data_by_schema_path = File.join(disk_path, 'data_by_schema.json')
21
+ hash = read_from_disk_cache(@data_by_schema_path)
22
+ @data_by_schema = hash || {}
19
23
  end
20
24
 
21
25
  # override
@@ -40,6 +44,12 @@ class AvroTurf::DiskCache
40
44
  @ids_by_schema[key]
41
45
  end
42
46
 
47
+ # override to use a json serializable cache key
48
+ def lookup_data_by_schema(subject, schema)
49
+ key = "#{subject}#{schema}"
50
+ @data_by_schema[key]
51
+ end
52
+
43
53
  # override to use a json serializable cache key and update the file cache
44
54
  def store_by_schema(subject, schema, id)
45
55
  key = "#{subject}#{schema}"
@@ -49,6 +59,15 @@ class AvroTurf::DiskCache
49
59
  id
50
60
  end
51
61
 
62
+ def store_data_by_schema(subject, schema, data)
63
+ return unless data
64
+
65
+ key = "#{subject}#{schema}"
66
+ @data_by_schema[key] = data
67
+ write_to_disk_cache(@data_by_schema_path, @data_by_schema)
68
+ data
69
+ end
70
+
52
71
  # checks instance var (in-memory cache) for schema
53
72
  # checks disk cache if in-memory cache doesn't exists
54
73
  # if file exists but no in-memory cache, read from file and sync in-memory cache
@@ -1,11 +1,11 @@
1
1
  # A cache for the CachedConfluentSchemaRegistry.
2
2
  # Simply stores the schemas and ids in in-memory hashes.
3
3
  class AvroTurf::InMemoryCache
4
-
5
4
  def initialize
6
5
  @schemas_by_id = {}
7
6
  @ids_by_schema = {}
8
7
  @schema_by_subject_version = {}
8
+ @data_by_schema = {}
9
9
  end
10
10
 
11
11
  def lookup_by_id(id)
@@ -21,11 +21,23 @@ class AvroTurf::InMemoryCache
21
21
  @ids_by_schema[key]
22
22
  end
23
23
 
24
+ def lookup_data_by_schema(subject, schema)
25
+ key = [subject, schema]
26
+ @data_by_schema[key]
27
+ end
28
+
24
29
  def store_by_schema(subject, schema, id)
25
30
  key = [subject, schema]
26
31
  @ids_by_schema[key] = id
27
32
  end
28
33
 
34
+ def store_data_by_schema(subject, schema, data)
35
+ return unless data
36
+
37
+ key = [subject, schema]
38
+ @data_by_schema[key] = data
39
+ end
40
+
29
41
  def lookup_by_version(subject, version)
30
42
  key = "#{subject}#{version}"
31
43
  @schema_by_subject_version[key]
@@ -106,27 +106,33 @@ class AvroTurf
106
106
 
107
107
  # Encodes a message using the specified schema.
108
108
  #
109
- # message - The message that should be encoded. Must be compatible with
110
- # the schema.
111
- # schema_name - The String name of the schema that should be used to encode
112
- # the data.
113
- # namespace - The namespace of the schema (optional).
114
- # subject - The subject name the schema should be registered under in
115
- # the schema registry (optional).
116
- # version - The integer version of the schema that should be used to decode
117
- # the data. Must match the schema used when encoding (optional).
118
- # schema_id - The integer id of the schema that should be used to encode
119
- # the data.
120
- # validate - The boolean for performing complete message validation before
121
- # encoding it, Avro::SchemaValidator::ValidationError with
122
- # a descriptive message will be raised in case of invalid message.
109
+ # message - The message that should be encoded. Must be compatible with
110
+ # the schema.
111
+ # schema_name - The String name of the schema that should be used to encode
112
+ # the data.
113
+ # namespace - The namespace of the schema (optional).
114
+ # subject - The subject name the schema should be registered under in
115
+ # the schema registry (optional).
116
+ # version - The integer version of the schema that should be used to decode
117
+ # the data. Must match the schema used when encoding (optional).
118
+ # schema_id - The integer id of the schema that should be used to encode
119
+ # the data.
120
+ # validate - The boolean for performing complete message validation before
121
+ # encoding it, Avro::SchemaValidator::ValidationError with
122
+ # a descriptive message will be raised in case of invalid message.
123
+ # register_schemas - The boolean that indicates whether or not the schema should be
124
+ # registered in case it does not exist, or if it should be fetched
125
+ # from the registry without registering it (register_schemas: false).
123
126
  #
124
127
  # Returns the encoded data as a String.
125
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
128
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false,
129
+ register_schemas: true)
126
130
  schema, schema_id = if schema_id
127
131
  fetch_schema_by_id(schema_id)
128
132
  elsif subject && version
129
133
  fetch_schema(subject: subject, version: version)
134
+ elsif schema_name && !register_schemas
135
+ fetch_schema_by_body(subject: subject, schema_name: schema_name, namespace: namespace)
130
136
  elsif schema_name
131
137
  register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
132
138
  else
@@ -228,6 +234,14 @@ class AvroTurf
228
234
  [schema, schema_id]
229
235
  end
230
236
 
237
+ def fetch_schema_by_body(schema_name:, subject: nil, namespace: nil)
238
+ schema = @schema_store.find(schema_name, namespace)
239
+ schema_data = @registry.check(subject || schema.fullname, schema)
240
+ raise SchemaNotFoundError.new("Schema with structure: #{schema} not found on registry") unless schema_data
241
+
242
+ [schema, schema_data["id"]]
243
+ end
244
+
231
245
  # Schemas are registered under the full name of the top level Avro record
232
246
  # type, or `subject` if it's provided.
233
247
  def register_schema(schema_name:, subject: nil, namespace: nil)
@@ -64,7 +64,8 @@ class FakePrefixedConfluentSchemaRegistryServer < FakeConfluentSchemaRegistrySer
64
64
 
65
65
  # Note: this does not actually handle the same schema registered under
66
66
  # multiple subjects
67
- schema_id = SCHEMAS.index(schema)
67
+ context, _subject = parse_qualified_subject(params[:subject])
68
+ schema_id = SCHEMAS[context].index(schema)
68
69
 
69
70
  halt(404, SCHEMA_NOT_FOUND) unless schema_id
70
71
 
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "1.16.0"
2
+ VERSION = "1.17.0"
3
3
  end
@@ -6,6 +6,7 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
6
6
  let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
7
  let(:registry) { described_class.new(upstream) }
8
8
  let(:id) { rand(999) }
9
+ let(:subject_name) { 'a_subject' }
9
10
  let(:schema) do
10
11
  {
11
12
  type: "record",
@@ -25,8 +26,6 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
25
26
  end
26
27
 
27
28
  describe "#register" do
28
- let(:subject_name) { "a_subject" }
29
-
30
29
  it "caches the result of register" do
31
30
  # multiple calls return same result, with only one upstream call
32
31
  allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
@@ -36,8 +35,29 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
36
35
  end
37
36
  end
38
37
 
38
+ describe "#check" do
39
+ let(:schema_data) do
40
+ {
41
+ "subject" => subject_name,
42
+ "version" => 123,
43
+ "id" => id,
44
+ "schema" => schema
45
+ }
46
+ end
47
+
48
+ before do
49
+ allow(upstream).to receive(:check).with(subject_name, schema).and_return(schema_data)
50
+ end
51
+
52
+ it "caches the result of check" do
53
+ # multiple calls return same result, with only one upstream call
54
+ expect(registry.check(subject_name, schema)).to eq(schema_data)
55
+ expect(registry.check(subject_name, schema)).to eq(schema_data)
56
+ expect(upstream).to have_received(:check).exactly(1).times
57
+ end
58
+ end
59
+
39
60
  describe '#subject_version' do
40
- let(:subject_name) { 'a_subject' }
41
61
  let(:version) { 1 }
42
62
  let(:schema_with_meta) do
43
63
  {
@@ -222,6 +222,66 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
222
222
  end
223
223
  end
224
224
 
225
+ describe "#check" do
226
+ let(:city_name) { "a_city" }
227
+ let(:schema_data) do
228
+ {
229
+ "subject" => subject,
230
+ "version" => version,
231
+ "id" => id,
232
+ "schema" => schema
233
+ }
234
+ end
235
+
236
+ let(:city_schema_data) do
237
+ {
238
+ "subject" => city_name,
239
+ "version" => version,
240
+ "id" => city_id,
241
+ "schema" => city_schema
242
+ }
243
+ end
244
+
245
+ let(:cache_before) do
246
+ {
247
+ "#{subject}#{schema}" => schema_data
248
+ }
249
+ end
250
+
251
+ let(:cache_after) do
252
+ {
253
+ "#{subject}#{schema}" => schema_data,
254
+ "#{city_name}#{city_schema}" => city_schema_data
255
+ }
256
+ end
257
+
258
+ # setup the disk cache to avoid performing the upstream fetch
259
+ before do
260
+ store_cache("data_by_schema.json", cache_before)
261
+ allow(upstream).to receive(:check).with(subject, schema).and_return(schema_data)
262
+ allow(upstream).to receive(:check).with(city_name, city_schema).and_return(city_schema_data)
263
+ end
264
+
265
+ context "when the schema is not found in the cache" do
266
+ it "makes only one request using upstream" do
267
+ expect(registry.check(city_name, city_schema)).to eq(city_schema_data)
268
+ expect(registry.check(city_name, city_schema)).to eq(city_schema_data)
269
+ expect(upstream).to have_received(:check).with(city_name, city_schema).exactly(1).times
270
+ expect(load_cache("data_by_schema.json")).to eq cache_after
271
+ end
272
+ end
273
+
274
+ context "when schema is already in the cache" do
275
+ it "uses preloaded disk cache" do
276
+ # multiple calls return same result, with zero upstream calls
277
+ expect(registry.check(subject, schema)).to eq(schema_data)
278
+ expect(registry.check(subject, schema)).to eq(schema_data)
279
+ expect(upstream).to have_received(:check).exactly(0).times
280
+ expect(load_cache("data_by_schema.json")).to eq cache_before
281
+ end
282
+ end
283
+ end
284
+
225
285
  it_behaves_like "a confluent schema registry client" do
226
286
  let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
227
287
  let(:registry) { described_class.new(upstream) }
@@ -36,6 +36,24 @@ describe AvroTurf::Messaging do
36
36
  }
37
37
  AVSC
38
38
  end
39
+
40
+ let(:city_message) { { "name" => "Paris" } }
41
+ let(:city_schema_json) do
42
+ <<-AVSC
43
+ {
44
+ "name": "city",
45
+ "type": "record",
46
+ "fields": [
47
+ {
48
+ "type": "string",
49
+ "name": "name"
50
+ }
51
+ ]
52
+ }
53
+ AVSC
54
+ end
55
+
56
+ let(:city_schema) { Avro::Schema.parse(city_schema_json) }
39
57
  let(:schema) { Avro::Schema.parse(schema_json) }
40
58
 
41
59
  before do
@@ -49,6 +67,7 @@ describe AvroTurf::Messaging do
49
67
 
50
68
  before do
51
69
  define_schema "person.avsc", schema_json
70
+ define_schema "city.avsc", city_schema_json
52
71
  end
53
72
 
54
73
  shared_examples_for "encoding and decoding with the schema from schema store" do
@@ -92,6 +111,16 @@ describe AvroTurf::Messaging do
92
111
  expect { avro.encode(message, subject: 'missing', version: 1) }.to raise_error(AvroTurf::SchemaNotFoundError)
93
112
  end
94
113
 
114
+ it 'raises AvroTurf::SchemaNotFoundError when the schema does not exist on registry and register_schemas false' do
115
+ expect { avro.encode(city_message, schema_name: 'city', register_schemas: false) }.
116
+ to raise_error(AvroTurf::SchemaNotFoundError, "Schema with structure: #{city_schema} not found on registry")
117
+ end
118
+
119
+ it 'encodes with register_schemas false when the schema exists on the registry' do
120
+ data = avro.encode(message, schema_name: 'person', register_schemas: false)
121
+ expect(avro.decode(data, schema_name: 'person')).to eq message
122
+ end
123
+
95
124
  it 'caches parsed schemas for decoding' do
96
125
  data = avro.encode(message, subject: 'person', version: 1)
97
126
  avro.decode(data)
@@ -364,6 +393,34 @@ describe AvroTurf::Messaging do
364
393
  end
365
394
  end
366
395
 
396
+ context 'using fetch_schema_by_body' do
397
+ let(:subject_name) { 'city' }
398
+ let(:schema_name) { 'city' }
399
+ let(:namespace) { 'namespace' }
400
+ let(:city_schema_id) { 125 }
401
+ let(:city_schema_data) do
402
+ {
403
+ "subject" => subject_name,
404
+ "version" => 123,
405
+ "id" => city_schema_id,
406
+ "schema" => city_schema
407
+ }
408
+ end
409
+
410
+ subject(:fetch_schema_by_body) do
411
+ avro.fetch_schema_by_body(schema_name: schema_name, namespace: namespace, subject: subject_name)
412
+ end
413
+
414
+ before do
415
+ allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(city_schema)
416
+ allow(registry).to receive(:check).with(subject_name, city_schema).and_return(city_schema_data)
417
+ end
418
+
419
+ it 'gets schema from registry' do
420
+ expect(fetch_schema_by_body).to eq([city_schema, city_schema_id])
421
+ end
422
+ end
423
+
367
424
  context 'using register_schema' do
368
425
  let(:schema_name) { 'schema_name' }
369
426
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.0
4
+ version: 1.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-12 00:00:00.000000000 Z
11
+ date: 2024-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -170,13 +170,14 @@ dependencies:
170
170
  - - ">="
171
171
  - !ruby/object:Gem::Version
172
172
  version: '0'
173
- description:
173
+ description:
174
174
  email:
175
175
  - dasch@zendesk.com
176
176
  executables: []
177
177
  extensions: []
178
178
  extra_rdoc_files: []
179
179
  files:
180
+ - ".github/workflows/push_gem.yml"
180
181
  - ".github/workflows/ruby.yml"
181
182
  - ".github/workflows/stale.yml"
182
183
  - ".gitignore"
@@ -265,8 +266,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
265
266
  - !ruby/object:Gem::Version
266
267
  version: '0'
267
268
  requirements: []
268
- rubygems_version: 3.4.10
269
- signing_key:
269
+ rubygems_version: 3.5.11
270
+ signing_key:
270
271
  specification_version: 4
271
272
  summary: A library that makes it easier to use the Avro serialization format from
272
273
  Ruby