avro_turf 0.11.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e2ee4d9598bcaa2ec5738a1130fae0b19be7b5e7250f27540313b7471f97e23
4
- data.tar.gz: 0b441cb30a153958c2ea283300a1a05c26218e2a97cb807fce36f8ad9d0240da
3
+ metadata.gz: 8fc2d29f1112e649cc2cd5ec96f2126a0ea5fa8f46ebee01d908cdceeac8da0d
4
+ data.tar.gz: e1989abd9d9b0e5db24f360e96e7bce7ce18e5f488cc794d25a2b3fae6102e66
5
5
  SHA512:
6
- metadata.gz: 12779eac5c325752cfa1be34da94ef5f332490cda4ff0aef29529a00557008cecf39592396a3f3525a2a12cb67a46744800781e0da69d4bf02511f5a2284e5e7
7
- data.tar.gz: a2e4c84fb338d62296aefb8ae8c206c262d756475ec11bd9cead17bfe6015ea069ff36a891df3381385fa650cb4b5ea1584855fccc5294f5910ab34563ad973a
6
+ metadata.gz: f562a66ba746d2c0e4bee8b8f3a0e7fcf65900f261eca444986c8ff6c68231e67aa9c4dc659213721d0d09c65fd9273e12e158938c0773baa48cf86598d6bef9
7
+ data.tar.gz: 53e3640900a3b64038fe063f5dadab2049417490be1833bb5f17124307abe628af64ea0c68b5bdc2bd4ef2f6e58bbfac287696eaf5ece9a35ddb7c2ab8e5127e
@@ -1,6 +1,6 @@
1
1
  name: Ruby
2
2
 
3
- on: [push]
3
+ on: [push, pull_request]
4
4
 
5
5
  jobs:
6
6
  build:
@@ -0,0 +1,19 @@
1
+ name: Mark stale issues and pull requests
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 0 * * *"
6
+
7
+ jobs:
8
+ stale:
9
+
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - uses: actions/stale@v1
14
+ with:
15
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
16
+ stale-issue-message: 'Stale issue message'
17
+ stale-pr-message: 'Stale pull request message'
18
+ stale-issue-label: 'no-issue-activity'
19
+ stale-pr-label: 'no-pr-activity'
@@ -1,7 +1,20 @@
1
- # avro_turf
1
+ # AvroTurf
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.2.0
6
+
7
+ - Expose `fetch_schema`, `fetch_schema_by_id` and `register_schema` schema in `Messaging` interface (#117, #119)
8
+ - Add ability to validate message before encoding in `Messaging#encode` interface (#116, #118)
9
+
10
+ ## v1.1.0
11
+
12
+ - Compatibility with Avro v1.10.x.
13
+
14
+ ## v1.0.0
15
+
16
+ - Stop caching nested sub-schemas (#111)
17
+
5
18
  ## v0.11.0
6
19
 
7
20
  - Add proxy support (#107)
data/README.md CHANGED
@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
16
16
 
17
17
  The aliases for the original names will be removed in a future release.
18
18
 
19
+ ## Note about finding nested schemas
20
+
21
+ As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
22
+
23
+ As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
24
+
25
+ ```json
26
+ {
27
+ "name": "person",
28
+ "namespace": "contacts",
29
+ "type": "record",
30
+ "fields": [
31
+ {
32
+ "name": "address",
33
+ "type": {
34
+ "name": "address",
35
+ "type": "record",
36
+ "fields": [
37
+ { "name": "addr1", "type": "string" },
38
+ { "name": "addr2", "type": "string" },
39
+ { "name": "city", "type": "string" },
40
+ { "name": "zip", "type": "string" }
41
+ ]
42
+ }
43
+ }
44
+ ]
45
+ }
46
+ ```
47
+ ...this will no longer work in v0.12.0:
48
+ ```ruby
49
+ store = AvroTurf::SchemaStore.new(path: 'my/schemas')
50
+ store.load_schemas!
51
+
52
+ # Accessing 'person' is correct and works fine.
53
+ person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
54
+
55
+ # Trying to access 'address' raises AvroTurf::SchemaNotFoundError
56
+ address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
57
+ ```
58
+
59
+ For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
60
+
19
61
  ## Installation
20
62
 
21
63
  Add this line to your application's Gemfile:
@@ -136,6 +178,10 @@ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version:
136
178
  # of the same schema version will be served by the cache.
137
179
  data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
138
180
 
181
+ # Message can be validated before encoding to get a description of problem through
182
+ # Avro::SchemaValidator::ValidationError exception
183
+ data = avro.encode({ "titl" => "hello, world" }, schema_name: "greeting", validate: true)
184
+
139
185
  # When decoding, the schema will be fetched from the registry and cached. Subsequent
140
186
  # instances of the same schema id will be served by the cache.
141
187
  avro.decode(data) #=> { "title" => "hello, world" }
@@ -147,6 +193,20 @@ result.message #=> { "title" => "hello, world" }
147
193
  result.schema_id #=> 3
148
194
  result.writer_schema #=> #<Avro::Schema: ...>
149
195
  result.reader_schema #=> nil
196
+
197
+ # You can also work with schema through this interface:
198
+ # Fetch latest schema for subject from registry
199
+ schema, schema_id = avro.fetch_schema(subject: 'greeting')
200
+ # Fetch specific version
201
+ schema, schema_id = avro.fetch_schema(subject: 'greeting', version: 1)
202
+ # Fetch schema by id
203
+ schema, schema_id = avro.fetch_schema_by_id(3)
204
+ # Register schema fetched from store by name
205
+ schema, schema_id = avro.register_schema(schema_name: 'greeting')
206
+ # Specify namespace (same as schema_name: 'somewhere.greeting')
207
+ schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere')
208
+ # Customize subject under which to register schema
209
+ schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere', subject: 'test')
150
210
  ```
151
211
 
152
212
  ### Confluent Schema Registry Client
@@ -17,8 +17,8 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
21
- spec.add_dependency "excon", "~> 0.45"
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.11"
21
+ spec.add_dependency "excon", "~> 0.71"
22
22
 
23
23
  spec.add_development_dependency "bundler", "~> 2.0"
24
24
  spec.add_development_dependency "rake", "~> 13.0"
@@ -84,19 +84,26 @@ class AvroTurf
84
84
  # the data. Must match the schema used when encoding (optional).
85
85
  # schema_id - The integer id of the schema that should be used to encode
86
86
  # the data.
87
+ # validate - The boolean for performing complete message validation before
88
+ # encoding it, Avro::SchemaValidator::ValidationError with
89
+ # a descriptive message will be raised in case of invalid message.
87
90
  #
88
91
  # Returns the encoded data as a String.
89
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil)
90
- schema_id, schema = if schema_id
92
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
93
+ schema, schema_id = if schema_id
91
94
  fetch_schema_by_id(schema_id)
92
95
  elsif subject && version
93
- fetch_schema(subject, version)
96
+ fetch_schema(subject: subject, version: version)
94
97
  elsif schema_name
95
- register_schema(subject, schema_name, namespace)
98
+ register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
96
99
  else
97
100
  raise ArgumentError.new('Neither schema_name nor schema_id nor subject + version provided to determine the schema.')
98
101
  end
99
102
 
103
+ if validate
104
+ Avro::SchemaValidator.validate!(schema, message, recursive: true, encoded: false, fail_on_extra_fields: true)
105
+ end
106
+
100
107
  stream = StringIO.new
101
108
  writer = Avro::IO::DatumWriter.new(schema)
102
109
  encoder = Avro::IO::BinaryEncoder.new(stream)
@@ -169,31 +176,29 @@ class AvroTurf
169
176
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
170
177
  end
171
178
 
172
- private
173
-
174
179
  # Providing subject and version to determine the schema,
175
180
  # which skips the auto registeration of schema on the schema registry.
176
181
  # Fetch the schema from registry with the provided subject name and version.
177
- def fetch_schema(subject, version)
182
+ def fetch_schema(subject:, version: 'latest')
178
183
  schema_data = @registry.subject_version(subject, version)
179
184
  schema_id = schema_data.fetch('id')
180
185
  schema = Avro::Schema.parse(schema_data.fetch('schema'))
181
- [schema_id, schema]
186
+ [schema, schema_id]
182
187
  end
183
188
 
184
189
  # Fetch the schema from registry with the provided schema_id.
185
190
  def fetch_schema_by_id(schema_id)
186
191
  schema_json = @registry.fetch(schema_id)
187
192
  schema = Avro::Schema.parse(schema_json)
188
- [schema_id, schema]
193
+ [schema, schema_id]
189
194
  end
190
195
 
191
196
  # Schemas are registered under the full name of the top level Avro record
192
197
  # type, or `subject` if it's provided.
193
- def register_schema(subject, schema_name, namespace)
198
+ def register_schema(schema_name:, subject: nil, namespace: nil)
194
199
  schema = @schema_store.find(schema_name, namespace)
195
200
  schema_id = @registry.register(subject || schema.fullname, schema)
196
- [schema_id, schema]
201
+ [schema, schema_id]
197
202
  end
198
203
  end
199
204
  end
@@ -46,26 +46,45 @@ class AvroTurf::SchemaStore
46
46
 
47
47
  # Loads single schema
48
48
  # Such method is not thread-safe, do not call it of from mutex synchronization routine
49
- def load_schema!(fullname, namespace = nil)
49
+ def load_schema!(fullname, namespace = nil, local_schemas_cache = {})
50
50
  *namespace, schema_name = fullname.split(".")
51
51
  schema_path = File.join(@path, *namespace, schema_name + ".avsc")
52
52
  schema_json = JSON.parse(File.read(schema_path))
53
- schema = Avro::Schema.real_parse(schema_json, @schemas)
54
53
 
54
+ schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
55
+
56
+ # Don't cache the parsed schema until after its fullname is validated
55
57
  if schema.respond_to?(:fullname) && schema.fullname != fullname
56
58
  raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
57
59
  end
58
60
 
61
+ # Cache only this new top-level schema by its fullname. It's critical
62
+ # not to make every sub-schema resolvable at the top level here because
63
+ # multiple different avsc files may define the same sub-schema, and
64
+ # if we share the @schemas cache across all parsing contexts, the Avro
65
+ # gem will raise an Avro::SchemaParseError when parsing another avsc
66
+ # file that contains a subschema with the same fullname as one
67
+ # encountered previously in a different file:
68
+ # <Avro::SchemaParseError: The name "foo.bar" is already in use.>
69
+ # Essentially, the only schemas that should be resolvable in @schemas
70
+ # are those that have their own .avsc files on disk.
71
+ @schemas[fullname] = schema
72
+
59
73
  schema
60
74
  rescue ::Avro::SchemaParseError => e
61
75
  # This is a hack in order to figure out exactly which type was missing. The
62
76
  # Avro gem ought to provide this data directly.
63
77
  if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
64
- load_schema!($1)
78
+ # Try to first resolve a referenced schema from disk.
79
+ # If this is successful, the Avro gem will have mutated the
80
+ # local_schemas_cache, adding all the new schemas it found.
81
+ load_schema!($1, nil, local_schemas_cache)
65
82
 
66
- # Re-resolve the original schema now that the dependency has been resolved.
67
- @schemas.delete(fullname)
68
- load_schema!(fullname)
83
+ # Attempt to re-parse the original schema now that the dependency
84
+ # has been resolved and use the now-updated local_schemas_cache to
85
+ # pick up where we left off.
86
+ local_schemas_cache.delete(fullname)
87
+ load_schema!(fullname, nil, local_schemas_cache)
69
88
  else
70
89
  raise
71
90
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.11.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -297,4 +297,106 @@ describe AvroTurf::Messaging do
297
297
  end
298
298
  end
299
299
  end
300
+
301
+ context "validating" do
302
+ subject(:encode){ avro.encode(message, schema_name: "person", validate: true) }
303
+
304
+ context "for correct message" do
305
+ it { expect { encode }.not_to raise_error }
306
+ end
307
+
308
+ context "when message has wrong type" do
309
+ let(:message) { { "full_name" => 123 } }
310
+
311
+ it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /\.full_name expected type string, got int/) }
312
+ end
313
+
314
+ context "when message contains extra fields (typo in key)" do
315
+ let(:message) { { "fulll_name" => "John Doe" } }
316
+
317
+ it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/) }
318
+ end
319
+ end
320
+
321
+ context 'fetching and registering schema' do
322
+ let(:schema_store) { AvroTurf::SchemaStore.new(path: "spec/schemas") }
323
+
324
+ let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
325
+
326
+ let(:avro) do
327
+ AvroTurf::Messaging.new(
328
+ registry: registry,
329
+ schema_store: schema_store,
330
+ logger: logger
331
+ )
332
+ end
333
+
334
+ let(:schema_id) { 234 }
335
+
336
+ context 'using fetch_schema' do
337
+ subject { avro.fetch_schema(subject: subj, version: version) }
338
+
339
+ let(:subj) { 'subject' }
340
+
341
+ let(:version) { 'version' }
342
+
343
+ let(:response) { {'id' => schema_id, 'schema' => schema_json} }
344
+
345
+ before do
346
+ allow(registry).to receive(:subject_version).with(subj, version).and_return(response)
347
+ end
348
+
349
+ it 'gets schema from registry' do
350
+ expect(subject).to eq([schema, schema_id])
351
+ end
352
+ end
353
+
354
+ context 'using fetch_schema_by_id' do
355
+ subject { avro.fetch_schema_by_id(schema_id) }
356
+
357
+ before do
358
+ allow(registry).to receive(:fetch).with(schema_id).and_return(schema_json)
359
+ end
360
+
361
+ it 'gets schema from registry' do
362
+ expect(subject).to eq([schema, schema_id])
363
+ end
364
+ end
365
+
366
+ context 'using register_schema' do
367
+ let(:schema_name) { 'schema_name' }
368
+
369
+ let(:namespace) { 'namespace' }
370
+
371
+ before do
372
+ allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(schema)
373
+ end
374
+
375
+ context 'when subject is not set' do
376
+ subject { avro.register_schema(schema_name: schema_name, namespace: namespace) }
377
+
378
+ before do
379
+ allow(registry).to receive(:register).with(schema.fullname, schema).and_return(schema_id)
380
+ end
381
+
382
+ it 'registers schema in registry' do
383
+ expect(subject).to eq([schema, schema_id])
384
+ end
385
+ end
386
+
387
+ context 'when subject is set' do
388
+ subject { avro.register_schema(schema_name: schema_name, namespace: namespace, subject: subj) }
389
+
390
+ let(:subj) { 'subject' }
391
+
392
+ before do
393
+ allow(registry).to receive(:register).with(subj, schema).and_return(schema_id)
394
+ end
395
+
396
+ it 'registers schema in registry' do
397
+ expect(subject).to eq([schema, schema_id])
398
+ end
399
+ end
400
+ end
401
+ end
300
402
  end
@@ -198,6 +198,104 @@ describe AvroTurf::SchemaStore do
198
198
  expect(schema.fullname).to eq "person"
199
199
  end
200
200
 
201
+ # This test would fail under avro_turf <= v0.11.0
202
+ it "does NOT cache *nested* schemas in memory" do
203
+ FileUtils.mkdir_p("spec/schemas/test")
204
+
205
+ define_schema "test/person.avsc", <<-AVSC
206
+ {
207
+ "name": "person",
208
+ "namespace": "test",
209
+ "type": "record",
210
+ "fields": [
211
+ {
212
+ "name": "address",
213
+ "type": {
214
+ "name": "address",
215
+ "type": "record",
216
+ "fields": [
217
+ { "name": "addr1", "type": "string" },
218
+ { "name": "addr2", "type": "string" },
219
+ { "name": "city", "type": "string" },
220
+ { "name": "zip", "type": "string" }
221
+ ]
222
+ }
223
+ }
224
+ ]
225
+ }
226
+ AVSC
227
+
228
+ schema = store.find('person', 'test')
229
+ expect(schema.fullname).to eq "test.person"
230
+
231
+ expect { store.find('address', 'test') }.
232
+ to raise_error(AvroTurf::SchemaNotFoundError)
233
+ end
234
+
235
+ # This test would fail under avro_turf <= v0.11.0
236
+ it "allows two different avsc files to define nested sub-schemas with the same fullname" do
237
+ FileUtils.mkdir_p("spec/schemas/test")
238
+
239
+ define_schema "test/person.avsc", <<-AVSC
240
+ {
241
+ "name": "person",
242
+ "namespace": "test",
243
+ "type": "record",
244
+ "fields": [
245
+ {
246
+ "name": "location",
247
+ "type": {
248
+ "name": "location",
249
+ "type": "record",
250
+ "fields": [
251
+ { "name": "city", "type": "string" },
252
+ { "name": "zipcode", "type": "string" }
253
+ ]
254
+ }
255
+ }
256
+ ]
257
+ }
258
+ AVSC
259
+
260
+ define_schema "test/company.avsc", <<-AVSC
261
+ {
262
+ "name": "company",
263
+ "namespace": "test",
264
+ "type": "record",
265
+ "fields": [
266
+ {
267
+ "name": "headquarters",
268
+ "type": {
269
+ "name": "location",
270
+ "type": "record",
271
+ "fields": [
272
+ { "name": "city", "type": "string" },
273
+ { "name": "postcode", "type": "string" }
274
+ ]
275
+ }
276
+ }
277
+ ]
278
+ }
279
+ AVSC
280
+
281
+ company = nil
282
+ person = store.find('person', 'test')
283
+
284
+ # This should *NOT* raise the error:
285
+ # #<Avro::SchemaParseError: The name "test.location" is already in use.>
286
+ expect { company = store.find('company', 'test') }.not_to raise_error
287
+
288
+ person_location_field = person.fields_hash['location']
289
+ expect(person_location_field.type.name).to eq('location')
290
+ expect(person_location_field.type.fields_hash).to include('zipcode')
291
+ expect(person_location_field.type.fields_hash).not_to include('postcode')
292
+
293
+ company_headquarters_field = company.fields_hash['headquarters']
294
+ expect(company_headquarters_field.type.name).to eq('location')
295
+ expect(company_headquarters_field.type.fields_hash).to include('postcode')
296
+ expect(company_headquarters_field.type.fields_hash).not_to include('zipcode')
297
+ end
298
+
201
299
  it "is thread safe" do
202
300
  define_schema "address.avsc", <<-AVSC
203
301
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-25 00:00:00.000000000 Z
11
+ date: 2020-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 1.7.7
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '1.10'
22
+ version: '1.11'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,21 +29,21 @@ dependencies:
29
29
  version: 1.7.7
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.10'
32
+ version: '1.11'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: excon
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '0.45'
39
+ version: '0.71'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.45'
46
+ version: '0.71'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -165,6 +165,7 @@ extra_rdoc_files: []
165
165
  files:
166
166
  - ".circleci/config.yml"
167
167
  - ".github/workflows/ruby.yml"
168
+ - ".github/workflows/stale.yml"
168
169
  - ".gitignore"
169
170
  - ".rspec"
170
171
  - CHANGELOG.md