avro_turf 1.16.0 → 1.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2345cdad9c7472c5fd79aa3710150c40095ff2b970b28ea17b3db8b951fd143f
4
- data.tar.gz: 264fbec47c582106134b51fc80823f93a82c9f1053a2d61e8899eac72a27256f
3
+ metadata.gz: 22e1ca443d93f4f1c38b9626ad1a83db6352a4aac397256a8a3bfae2ca462d9c
4
+ data.tar.gz: 2621ae9378b2511cee81522247c5f4df883ee9c25efe92839cdc5af22543bc1f
5
5
  SHA512:
6
- metadata.gz: 2f17112a66c063514d56c7a4a5d785fa61e1c2079b7a02eede619b1b030997ba689b897ac8fc55f355fc781a1fd48dd77c545315478155b70853a0263fd571ac
7
- data.tar.gz: 9d34639b83ee204e15143cc073aacd24157556eaede70e855384ba43ec769c6ed12505ffa2cf6d17ae6e216095c0ad097ace8af42579efaec4fc34ad92c482fe
6
+ metadata.gz: 56f2e3885be65423da7be65fe067151b3fd270ce13af76455a0f71cccbf6d68b09661e75f882c50ea9745f9cdc21e78ce8bd527d8123fa4b0c094dfaf6baa03a
7
+ data.tar.gz: 4b8e92503870f8c83c8af5457786da3c83823706d957a96986488f2800166ae02846141e9eeb28c261c6b0e0f5d7922244f65dfb47e39bbc96a9a29fea075e55
@@ -0,0 +1,24 @@
1
+ on:
2
+ push:
3
+ branches:
4
+ - master
5
+ jobs:
6
+ push:
7
+ name: Push gem to RubyGems.org
8
+ runs-on: ubuntu-latest
9
+
10
+ permissions:
11
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
12
+ contents: write # IMPORTANT: this permission is required for `rake release` to push the release tag
13
+
14
+ steps:
15
+ # Set up
16
+ - uses: actions/checkout@v4
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true
21
+ ruby-version: ruby
22
+
23
+ # Release
24
+ - uses: rubygems/release-gem@v1
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.17.0
6
+
7
+ - Add `register_schemas` option to `encode` method [#210](https://github.com/dasch/avro_turf/pull/210)
8
+
5
9
  ## v1.16.0
6
10
 
7
11
  - Add compatibility with Avro v1.12.x.
data/README.md CHANGED
@@ -92,6 +92,9 @@ avro.encode({ "name" => "Jane", "age" => 28 }, schema_name: "person")
92
92
  # Data can be validated before encoding to get a description of problem through
93
93
  # Avro::SchemaValidator::ValidationError exception
94
94
  avro.encode({ "titl" => "hello, world" }, schema_name: "person", validate: true)
95
+
96
+ # If you do not want to register the schema in case it does not exist, you can pass the register_schemas option as false
97
+ avro.encode({ "name" => "Jane", "age" => 28 }, schema_name: "person", register_schemas: false)
95
98
  ```
96
99
 
97
100
  ### Inter-schema references
@@ -17,13 +17,17 @@ class AvroTurf::CachedConfluentSchemaRegistry
17
17
  end
18
18
 
19
19
  # Delegate the following methods to the upstream
20
- %i(subjects subject_versions schema_subject_versions check compatible?
20
+ %i(subjects subject_versions schema_subject_versions compatible?
21
21
  global_config update_global_config subject_config update_subject_config).each do |name|
22
22
  define_method(name) do |*args|
23
23
  instance_variable_get(:@upstream).send(name, *args)
24
24
  end
25
25
  end
26
26
 
27
+ def check(subject, schema)
28
+ @cache.lookup_data_by_schema(subject, schema) || @cache.store_data_by_schema(subject, schema, @upstream.check(subject, schema))
29
+ end
30
+
27
31
  def fetch(id)
28
32
  @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
29
33
  end
@@ -16,6 +16,10 @@ class AvroTurf::DiskCache
16
16
 
17
17
  @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
18
18
  @schemas_by_subject_version = {}
19
+
20
+ @data_by_schema_path = File.join(disk_path, 'data_by_schema.json')
21
+ hash = read_from_disk_cache(@data_by_schema_path)
22
+ @data_by_schema = hash || {}
19
23
  end
20
24
 
21
25
  # override
@@ -40,6 +44,12 @@ class AvroTurf::DiskCache
40
44
  @ids_by_schema[key]
41
45
  end
42
46
 
47
+ # override to use a json serializable cache key
48
+ def lookup_data_by_schema(subject, schema)
49
+ key = "#{subject}#{schema}"
50
+ @data_by_schema[key]
51
+ end
52
+
43
53
  # override to use a json serializable cache key and update the file cache
44
54
  def store_by_schema(subject, schema, id)
45
55
  key = "#{subject}#{schema}"
@@ -49,6 +59,15 @@ class AvroTurf::DiskCache
49
59
  id
50
60
  end
51
61
 
62
+ def store_data_by_schema(subject, schema, data)
63
+ return unless data
64
+
65
+ key = "#{subject}#{schema}"
66
+ @data_by_schema[key] = data
67
+ write_to_disk_cache(@data_by_schema_path, @data_by_schema)
68
+ data
69
+ end
70
+
52
71
  # checks instance var (in-memory cache) for schema
53
72
  # checks disk cache if in-memory cache doesn't exists
54
73
  # if file exists but no in-memory cache, read from file and sync in-memory cache
@@ -1,11 +1,11 @@
1
1
  # A cache for the CachedConfluentSchemaRegistry.
2
2
  # Simply stores the schemas and ids in in-memory hashes.
3
3
  class AvroTurf::InMemoryCache
4
-
5
4
  def initialize
6
5
  @schemas_by_id = {}
7
6
  @ids_by_schema = {}
8
7
  @schema_by_subject_version = {}
8
+ @data_by_schema = {}
9
9
  end
10
10
 
11
11
  def lookup_by_id(id)
@@ -21,11 +21,23 @@ class AvroTurf::InMemoryCache
21
21
  @ids_by_schema[key]
22
22
  end
23
23
 
24
+ def lookup_data_by_schema(subject, schema)
25
+ key = [subject, schema]
26
+ @data_by_schema[key]
27
+ end
28
+
24
29
  def store_by_schema(subject, schema, id)
25
30
  key = [subject, schema]
26
31
  @ids_by_schema[key] = id
27
32
  end
28
33
 
34
+ def store_data_by_schema(subject, schema, data)
35
+ return unless data
36
+
37
+ key = [subject, schema]
38
+ @data_by_schema[key] = data
39
+ end
40
+
29
41
  def lookup_by_version(subject, version)
30
42
  key = "#{subject}#{version}"
31
43
  @schema_by_subject_version[key]
@@ -106,27 +106,33 @@ class AvroTurf
106
106
 
107
107
  # Encodes a message using the specified schema.
108
108
  #
109
- # message - The message that should be encoded. Must be compatible with
110
- # the schema.
111
- # schema_name - The String name of the schema that should be used to encode
112
- # the data.
113
- # namespace - The namespace of the schema (optional).
114
- # subject - The subject name the schema should be registered under in
115
- # the schema registry (optional).
116
- # version - The integer version of the schema that should be used to decode
117
- # the data. Must match the schema used when encoding (optional).
118
- # schema_id - The integer id of the schema that should be used to encode
119
- # the data.
120
- # validate - The boolean for performing complete message validation before
121
- # encoding it, Avro::SchemaValidator::ValidationError with
122
- # a descriptive message will be raised in case of invalid message.
109
+ # message - The message that should be encoded. Must be compatible with
110
+ # the schema.
111
+ # schema_name - The String name of the schema that should be used to encode
112
+ # the data.
113
+ # namespace - The namespace of the schema (optional).
114
+ # subject - The subject name the schema should be registered under in
115
+ # the schema registry (optional).
116
+ # version - The integer version of the schema that should be used to decode
117
+ # the data. Must match the schema used when encoding (optional).
118
+ # schema_id - The integer id of the schema that should be used to encode
119
+ # the data.
120
+ # validate - The boolean for performing complete message validation before
121
+ # encoding it, Avro::SchemaValidator::ValidationError with
122
+ # a descriptive message will be raised in case of invalid message.
123
+ # register_schemas - The boolean that indicates whether or not the schema should be
124
+ # registered in case it does not exist, or if it should be fetched
125
+ # from the registry without registering it (register_schemas: false).
123
126
  #
124
127
  # Returns the encoded data as a String.
125
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
128
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false,
129
+ register_schemas: true)
126
130
  schema, schema_id = if schema_id
127
131
  fetch_schema_by_id(schema_id)
128
132
  elsif subject && version
129
133
  fetch_schema(subject: subject, version: version)
134
+ elsif schema_name && !register_schemas
135
+ fetch_schema_by_body(subject: subject, schema_name: schema_name, namespace: namespace)
130
136
  elsif schema_name
131
137
  register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
132
138
  else
@@ -228,6 +234,14 @@ class AvroTurf
228
234
  [schema, schema_id]
229
235
  end
230
236
 
237
+ def fetch_schema_by_body(schema_name:, subject: nil, namespace: nil)
238
+ schema = @schema_store.find(schema_name, namespace)
239
+ schema_data = @registry.check(subject || schema.fullname, schema)
240
+ raise SchemaNotFoundError.new("Schema with structure: #{schema} not found on registry") unless schema_data
241
+
242
+ [schema, schema_data["id"]]
243
+ end
244
+
231
245
  # Schemas are registered under the full name of the top level Avro record
232
246
  # type, or `subject` if it's provided.
233
247
  def register_schema(schema_name:, subject: nil, namespace: nil)
@@ -64,7 +64,8 @@ class FakePrefixedConfluentSchemaRegistryServer < FakeConfluentSchemaRegistrySer
64
64
 
65
65
  # Note: this does not actually handle the same schema registered under
66
66
  # multiple subjects
67
- schema_id = SCHEMAS.index(schema)
67
+ context, _subject = parse_qualified_subject(params[:subject])
68
+ schema_id = SCHEMAS[context].index(schema)
68
69
 
69
70
  halt(404, SCHEMA_NOT_FOUND) unless schema_id
70
71
 
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "1.16.0"
2
+ VERSION = "1.17.0"
3
3
  end
@@ -6,6 +6,7 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
6
6
  let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
7
  let(:registry) { described_class.new(upstream) }
8
8
  let(:id) { rand(999) }
9
+ let(:subject_name) { 'a_subject' }
9
10
  let(:schema) do
10
11
  {
11
12
  type: "record",
@@ -25,8 +26,6 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
25
26
  end
26
27
 
27
28
  describe "#register" do
28
- let(:subject_name) { "a_subject" }
29
-
30
29
  it "caches the result of register" do
31
30
  # multiple calls return same result, with only one upstream call
32
31
  allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
@@ -36,8 +35,29 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
36
35
  end
37
36
  end
38
37
 
38
+ describe "#check" do
39
+ let(:schema_data) do
40
+ {
41
+ "subject" => subject_name,
42
+ "version" => 123,
43
+ "id" => id,
44
+ "schema" => schema
45
+ }
46
+ end
47
+
48
+ before do
49
+ allow(upstream).to receive(:check).with(subject_name, schema).and_return(schema_data)
50
+ end
51
+
52
+ it "caches the result of check" do
53
+ # multiple calls return same result, with only one upstream call
54
+ expect(registry.check(subject_name, schema)).to eq(schema_data)
55
+ expect(registry.check(subject_name, schema)).to eq(schema_data)
56
+ expect(upstream).to have_received(:check).exactly(1).times
57
+ end
58
+ end
59
+
39
60
  describe '#subject_version' do
40
- let(:subject_name) { 'a_subject' }
41
61
  let(:version) { 1 }
42
62
  let(:schema_with_meta) do
43
63
  {
@@ -222,6 +222,66 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
222
222
  end
223
223
  end
224
224
 
225
+ describe "#check" do
226
+ let(:city_name) { "a_city" }
227
+ let(:schema_data) do
228
+ {
229
+ "subject" => subject,
230
+ "version" => version,
231
+ "id" => id,
232
+ "schema" => schema
233
+ }
234
+ end
235
+
236
+ let(:city_schema_data) do
237
+ {
238
+ "subject" => city_name,
239
+ "version" => version,
240
+ "id" => city_id,
241
+ "schema" => city_schema
242
+ }
243
+ end
244
+
245
+ let(:cache_before) do
246
+ {
247
+ "#{subject}#{schema}" => schema_data
248
+ }
249
+ end
250
+
251
+ let(:cache_after) do
252
+ {
253
+ "#{subject}#{schema}" => schema_data,
254
+ "#{city_name}#{city_schema}" => city_schema_data
255
+ }
256
+ end
257
+
258
+ # setup the disk cache to avoid performing the upstream fetch
259
+ before do
260
+ store_cache("data_by_schema.json", cache_before)
261
+ allow(upstream).to receive(:check).with(subject, schema).and_return(schema_data)
262
+ allow(upstream).to receive(:check).with(city_name, city_schema).and_return(city_schema_data)
263
+ end
264
+
265
+ context "when the schema is not found in the cache" do
266
+ it "makes only one request using upstream" do
267
+ expect(registry.check(city_name, city_schema)).to eq(city_schema_data)
268
+ expect(registry.check(city_name, city_schema)).to eq(city_schema_data)
269
+ expect(upstream).to have_received(:check).with(city_name, city_schema).exactly(1).times
270
+ expect(load_cache("data_by_schema.json")).to eq cache_after
271
+ end
272
+ end
273
+
274
+ context "when schema is already in the cache" do
275
+ it "uses preloaded disk cache" do
276
+ # multiple calls return same result, with zero upstream calls
277
+ expect(registry.check(subject, schema)).to eq(schema_data)
278
+ expect(registry.check(subject, schema)).to eq(schema_data)
279
+ expect(upstream).to have_received(:check).exactly(0).times
280
+ expect(load_cache("data_by_schema.json")).to eq cache_before
281
+ end
282
+ end
283
+ end
284
+
225
285
  it_behaves_like "a confluent schema registry client" do
226
286
  let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
227
287
  let(:registry) { described_class.new(upstream) }
@@ -36,6 +36,24 @@ describe AvroTurf::Messaging do
36
36
  }
37
37
  AVSC
38
38
  end
39
+
40
+ let(:city_message) { { "name" => "Paris" } }
41
+ let(:city_schema_json) do
42
+ <<-AVSC
43
+ {
44
+ "name": "city",
45
+ "type": "record",
46
+ "fields": [
47
+ {
48
+ "type": "string",
49
+ "name": "name"
50
+ }
51
+ ]
52
+ }
53
+ AVSC
54
+ end
55
+
56
+ let(:city_schema) { Avro::Schema.parse(city_schema_json) }
39
57
  let(:schema) { Avro::Schema.parse(schema_json) }
40
58
 
41
59
  before do
@@ -49,6 +67,7 @@ describe AvroTurf::Messaging do
49
67
 
50
68
  before do
51
69
  define_schema "person.avsc", schema_json
70
+ define_schema "city.avsc", city_schema_json
52
71
  end
53
72
 
54
73
  shared_examples_for "encoding and decoding with the schema from schema store" do
@@ -92,6 +111,16 @@ describe AvroTurf::Messaging do
92
111
  expect { avro.encode(message, subject: 'missing', version: 1) }.to raise_error(AvroTurf::SchemaNotFoundError)
93
112
  end
94
113
 
114
+ it 'raises AvroTurf::SchemaNotFoundError when the schema does not exist on registry and register_schemas false' do
115
+ expect { avro.encode(city_message, schema_name: 'city', register_schemas: false) }.
116
+ to raise_error(AvroTurf::SchemaNotFoundError, "Schema with structure: #{city_schema} not found on registry")
117
+ end
118
+
119
+ it 'encodes with register_schemas false when the schema exists on the registry' do
120
+ data = avro.encode(message, schema_name: 'person', register_schemas: false)
121
+ expect(avro.decode(data, schema_name: 'person')).to eq message
122
+ end
123
+
95
124
  it 'caches parsed schemas for decoding' do
96
125
  data = avro.encode(message, subject: 'person', version: 1)
97
126
  avro.decode(data)
@@ -364,6 +393,34 @@ describe AvroTurf::Messaging do
364
393
  end
365
394
  end
366
395
 
396
+ context 'using fetch_schema_by_body' do
397
+ let(:subject_name) { 'city' }
398
+ let(:schema_name) { 'city' }
399
+ let(:namespace) { 'namespace' }
400
+ let(:city_schema_id) { 125 }
401
+ let(:city_schema_data) do
402
+ {
403
+ "subject" => subject_name,
404
+ "version" => 123,
405
+ "id" => city_schema_id,
406
+ "schema" => city_schema
407
+ }
408
+ end
409
+
410
+ subject(:fetch_schema_by_body) do
411
+ avro.fetch_schema_by_body(schema_name: schema_name, namespace: namespace, subject: subject_name)
412
+ end
413
+
414
+ before do
415
+ allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(city_schema)
416
+ allow(registry).to receive(:check).with(subject_name, city_schema).and_return(city_schema_data)
417
+ end
418
+
419
+ it 'gets schema from registry' do
420
+ expect(fetch_schema_by_body).to eq([city_schema, city_schema_id])
421
+ end
422
+ end
423
+
367
424
  context 'using register_schema' do
368
425
  let(:schema_name) { 'schema_name' }
369
426
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.0
4
+ version: 1.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-12 00:00:00.000000000 Z
11
+ date: 2024-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -170,13 +170,14 @@ dependencies:
170
170
  - - ">="
171
171
  - !ruby/object:Gem::Version
172
172
  version: '0'
173
- description:
173
+ description:
174
174
  email:
175
175
  - dasch@zendesk.com
176
176
  executables: []
177
177
  extensions: []
178
178
  extra_rdoc_files: []
179
179
  files:
180
+ - ".github/workflows/push_gem.yml"
180
181
  - ".github/workflows/ruby.yml"
181
182
  - ".github/workflows/stale.yml"
182
183
  - ".gitignore"
@@ -265,8 +266,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
265
266
  - !ruby/object:Gem::Version
266
267
  version: '0'
267
268
  requirements: []
268
- rubygems_version: 3.4.10
269
- signing_key:
269
+ rubygems_version: 3.5.11
270
+ signing_key:
270
271
  specification_version: 4
271
272
  summary: A library that makes it easier to use the Avro serialization format from
272
273
  Ruby