avro_turf 0.11.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e2ee4d9598bcaa2ec5738a1130fae0b19be7b5e7250f27540313b7471f97e23
4
- data.tar.gz: 0b441cb30a153958c2ea283300a1a05c26218e2a97cb807fce36f8ad9d0240da
3
+ metadata.gz: a4e9638703b961c348d06adcfa3e34ac2bd908e68f9cf7762a550ae4ef453b8a
4
+ data.tar.gz: 6d53b1f9b5e4b9e5a3aaeb13207bf350e2b56c8db138cb499643180a94a684b7
5
5
  SHA512:
6
- metadata.gz: 12779eac5c325752cfa1be34da94ef5f332490cda4ff0aef29529a00557008cecf39592396a3f3525a2a12cb67a46744800781e0da69d4bf02511f5a2284e5e7
7
- data.tar.gz: a2e4c84fb338d62296aefb8ae8c206c262d756475ec11bd9cead17bfe6015ea069ff36a891df3381385fa650cb4b5ea1584855fccc5294f5910ab34563ad973a
6
+ metadata.gz: 7045f852f20d3ddeca3256724c918ed255722a1556f662d159b805921670d0a2af3e574e3c967bee7622d5d3e05f5348d6023b250006c3651cb7b1f9cec059bf
7
+ data.tar.gz: 50ca19fb058246ba19b28472e03bedcfbc09ddcd336d46deb29dec4b406d81a65c2b0f6ca84ec36d5227da035906976af133cf757cbfc2f20abda116c8e17d8c
@@ -1,11 +1,15 @@
1
1
  name: Ruby
2
2
 
3
- on: [push]
3
+ on: [push, pull_request]
4
4
 
5
5
  jobs:
6
6
  build:
7
7
 
8
8
  runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ ruby-version: [1.8.x, 1.9.x, 2.0.x, 2.1.x, 2.2.x,
12
+ 2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
9
13
 
10
14
  steps:
11
15
  - uses: actions/checkout@v1
@@ -0,0 +1,19 @@
1
+ name: Mark stale issues and pull requests
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 0 * * *"
6
+
7
+ jobs:
8
+ stale:
9
+
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - uses: actions/stale@v1
14
+ with:
15
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
16
+ stale-issue-message: 'Stale issue message'
17
+ stale-pr-message: 'Stale pull request message'
18
+ stale-issue-label: 'no-issue-activity'
19
+ stale-pr-label: 'no-pr-activity'
data/CHANGELOG.md CHANGED
@@ -1,7 +1,29 @@
1
- # avro_turf
1
+ # AvroTurf
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.3.1
6
+
7
+ - Prevent CachedConfluentSchemaRegistry from caching the 'latest' version (#140)
8
+ - Fix issue with zero length schema cache file (#138)
9
+
10
+ ## v1.3.0
11
+
12
+ - Add support for plain user/password auth to ConfluentSchemaRegistry (#120)
13
+
14
+ ## v1.2.0
15
+
16
+ - Expose `fetch_schema`, `fetch_schema_by_id` and `register_schema` schema in `Messaging` interface (#117, #119)
17
+ - Add ability to validate message before encoding in `Messaging#encode` interface (#116, #118)
18
+
19
+ ## v1.1.0
20
+
21
+ - Compatibility with Avro v1.10.x.
22
+
23
+ ## v1.0.0
24
+
25
+ - Stop caching nested sub-schemas (#111)
26
+
5
27
  ## v0.11.0
6
28
 
7
29
  - Add proxy support (#107)
data/README.md CHANGED
@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
16
16
 
17
17
  The aliases for the original names will be removed in a future release.
18
18
 
19
+ ## Note about finding nested schemas
20
+
21
+ As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
22
+
23
+ As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
24
+
25
+ ```json
26
+ {
27
+ "name": "person",
28
+ "namespace": "contacts",
29
+ "type": "record",
30
+ "fields": [
31
+ {
32
+ "name": "address",
33
+ "type": {
34
+ "name": "address",
35
+ "type": "record",
36
+ "fields": [
37
+ { "name": "addr1", "type": "string" },
38
+ { "name": "addr2", "type": "string" },
39
+ { "name": "city", "type": "string" },
40
+ { "name": "zip", "type": "string" }
41
+ ]
42
+ }
43
+ }
44
+ ]
45
+ }
46
+ ```
47
+ ...this will no longer work in v0.12.0:
48
+ ```ruby
49
+ store = AvroTurf::SchemaStore.new(path: 'my/schemas')
50
+ store.load_schemas!
51
+
52
+ # Accessing 'person' is correct and works fine.
53
+ person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
54
+
55
+ # Trying to access 'address' raises AvroTurf::SchemaNotFoundError
56
+ address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
57
+ ```
58
+
59
+ For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
60
+
19
61
  ## Installation
20
62
 
21
63
  Add this line to your application's Gemfile:
@@ -136,6 +178,10 @@ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version:
136
178
  # of the same schema version will be served by the cache.
137
179
  data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
138
180
 
181
+ # Message can be validated before encoding to get a description of problem through
182
+ # Avro::SchemaValidator::ValidationError exception
183
+ data = avro.encode({ "titl" => "hello, world" }, schema_name: "greeting", validate: true)
184
+
139
185
  # When decoding, the schema will be fetched from the registry and cached. Subsequent
140
186
  # instances of the same schema id will be served by the cache.
141
187
  avro.decode(data) #=> { "title" => "hello, world" }
@@ -147,6 +193,20 @@ result.message #=> { "title" => "hello, world" }
147
193
  result.schema_id #=> 3
148
194
  result.writer_schema #=> #<Avro::Schema: ...>
149
195
  result.reader_schema #=> nil
196
+
197
+ # You can also work with schema through this interface:
198
+ # Fetch latest schema for subject from registry
199
+ schema, schema_id = avro.fetch_schema(subject: 'greeting')
200
+ # Fetch specific version
201
+ schema, schema_id = avro.fetch_schema(subject: 'greeting', version: 1)
202
+ # Fetch schema by id
203
+ schema, schema_id = avro.fetch_schema_by_id(3)
204
+ # Register schema fetched from store by name
205
+ schema, schema_id = avro.register_schema(schema_name: 'greeting')
206
+ # Specify namespace (same as schema_name: 'somewhere.greeting')
207
+ schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere')
208
+ # Customize subject under which to register schema
209
+ schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere', subject: 'test')
150
210
  ```
151
211
 
152
212
  ### Confluent Schema Registry Client
data/avro_turf.gemspec CHANGED
@@ -17,8 +17,8 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
21
- spec.add_dependency "excon", "~> 0.45"
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.11"
21
+ spec.add_dependency "excon", "~> 0.71"
22
22
 
23
23
  spec.add_development_dependency "bundler", "~> 2.0"
24
24
  spec.add_development_dependency "rake", "~> 13.0"
@@ -33,6 +33,8 @@ class AvroTurf::CachedConfluentSchemaRegistry
33
33
  end
34
34
 
35
35
  def subject_version(subject, version = 'latest')
36
+ return @upstream.subject_version(subject, version) if version == 'latest'
37
+
36
38
  @cache.lookup_by_version(subject, version) ||
37
39
  @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
38
40
  end
@@ -7,6 +7,8 @@ class AvroTurf::ConfluentSchemaRegistry
7
7
  url,
8
8
  logger: Logger.new($stdout),
9
9
  proxy: nil,
10
+ user: nil,
11
+ password: nil,
10
12
  client_cert: nil,
11
13
  client_key: nil,
12
14
  client_key_pass: nil,
@@ -17,10 +19,12 @@ class AvroTurf::ConfluentSchemaRegistry
17
19
  headers = {
18
20
  "Content-Type" => CONTENT_TYPE
19
21
  }
20
- headers[:proxy] = proxy if proxy&.present?
22
+ headers[:proxy] = proxy unless proxy.nil?
21
23
  @connection = Excon.new(
22
24
  url,
23
25
  headers: headers,
26
+ user: user,
27
+ password: password,
24
28
  client_cert: client_cert,
25
29
  client_key: client_key,
26
30
  client_key_pass: client_key_pass,
@@ -2,15 +2,19 @@
2
2
  # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
3
3
  class AvroTurf::DiskCache < AvroTurf::InMemoryCache
4
4
 
5
- def initialize(disk_path)
5
+ def initialize(disk_path, logger: Logger.new($stdout))
6
6
  super()
7
7
 
8
+ @logger = logger
9
+
8
10
  # load the write-thru cache on startup, if it exists
9
11
  @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
10
- @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
12
+ hash = read_from_disk_cache(@schemas_by_id_path)
13
+ @schemas_by_id = hash if hash
11
14
 
12
15
  @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
13
- @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
16
+ hash = read_from_disk_cache(@ids_by_schema_path)
17
+ @ids_by_schema = hash if hash
14
18
 
15
19
  @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
16
20
  @schemas_by_subject_version = {}
@@ -31,12 +35,18 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
31
35
  return value
32
36
  end
33
37
 
34
- # override to include write-thru cache after storing result from upstream
38
+ # override to use a json serializable cache key
39
+ def lookup_by_schema(subject, schema)
40
+ key = "#{subject}#{schema}"
41
+ @ids_by_schema[key]
42
+ end
43
+
44
+ # override to use a json serializable cache key and update the file cache
35
45
  def store_by_schema(subject, schema, id)
36
- # must return the value from storing the result (i.e. do not return result from file write)
37
- value = super
46
+ key = "#{subject}#{schema}"
47
+ @ids_by_schema[key] = id
38
48
  File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
39
- return value
49
+ id
40
50
  end
41
51
 
42
52
  # checks instance var (in-memory cache) for schema
@@ -49,7 +59,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
49
59
 
50
60
  return schema unless schema.nil?
51
61
 
52
- hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
62
+ hash = read_from_disk_cache(@schemas_by_subject_version_path)
53
63
  if hash
54
64
  @schemas_by_subject_version = hash
55
65
  @schemas_by_subject_version[key]
@@ -63,7 +73,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
63
73
  # update instance var (in memory-cache) to match
64
74
  def store_by_version(subject, version, schema)
65
75
  key = "#{subject}#{version}"
66
- hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
76
+ hash = read_from_disk_cache(@schemas_by_subject_version_path)
67
77
  hash = if hash
68
78
  hash[key] = schema
69
79
  hash
@@ -77,6 +87,19 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
77
87
  @schemas_by_subject_version[key]
78
88
  end
79
89
 
90
+ # Parse the file from disk, if it exists and is not zero length
91
+ private def read_from_disk_cache(path)
92
+ if File.exist?(path)
93
+ if File.size(path)!=0
94
+ return JSON.parse(File.read(path))
95
+ else
96
+ # just log a message if skipping zero length file
97
+ @logger.warn "skipping JSON.parse of zero length file at #{path}"
98
+ end
99
+ end
100
+ return nil
101
+ end
102
+
80
103
  private def write_to_disk_cache(path, hash)
81
104
  File.write(path, JSON.pretty_generate(hash))
82
105
  end
@@ -17,12 +17,12 @@ class AvroTurf::InMemoryCache
17
17
  end
18
18
 
19
19
  def lookup_by_schema(subject, schema)
20
- key = subject + schema.to_s
20
+ key = [subject, schema.to_s]
21
21
  @ids_by_schema[key]
22
22
  end
23
23
 
24
24
  def store_by_schema(subject, schema, id)
25
- key = subject + schema.to_s
25
+ key = [subject, schema.to_s]
26
26
  @ids_by_schema[key] = id
27
27
  end
28
28
 
@@ -34,6 +34,8 @@ class AvroTurf
34
34
  # namespace - The String default schema namespace.
35
35
  # logger - The Logger that should be used to log information (optional).
36
36
  # proxy - Forward the request via proxy (optional).
37
+ # user - User for basic auth (optional).
38
+ # password - Password for basic auth (optional).
37
39
  # client_cert - Name of file containing client certificate (optional).
38
40
  # client_key - Name of file containing client private key to go with client_cert (optional).
39
41
  # client_key_pass - Password to go with client_key (optional).
@@ -47,6 +49,8 @@ class AvroTurf
47
49
  namespace: nil,
48
50
  logger: nil,
49
51
  proxy: nil,
52
+ user: nil,
53
+ password: nil,
50
54
  client_cert: nil,
51
55
  client_key: nil,
52
56
  client_key_pass: nil,
@@ -61,6 +65,8 @@ class AvroTurf
61
65
  registry_url,
62
66
  logger: @logger,
63
67
  proxy: proxy,
68
+ user: user,
69
+ password: password,
64
70
  client_cert: client_cert,
65
71
  client_key: client_key,
66
72
  client_key_pass: client_key_pass,
@@ -84,19 +90,26 @@ class AvroTurf
84
90
  # the data. Must match the schema used when encoding (optional).
85
91
  # schema_id - The integer id of the schema that should be used to encode
86
92
  # the data.
93
+ # validate - The boolean for performing complete message validation before
94
+ # encoding it, Avro::SchemaValidator::ValidationError with
95
+ # a descriptive message will be raised in case of invalid message.
87
96
  #
88
97
  # Returns the encoded data as a String.
89
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil)
90
- schema_id, schema = if schema_id
98
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
99
+ schema, schema_id = if schema_id
91
100
  fetch_schema_by_id(schema_id)
92
101
  elsif subject && version
93
- fetch_schema(subject, version)
102
+ fetch_schema(subject: subject, version: version)
94
103
  elsif schema_name
95
- register_schema(subject, schema_name, namespace)
104
+ register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
96
105
  else
97
106
  raise ArgumentError.new('Neither schema_name nor schema_id nor subject + version provided to determine the schema.')
98
107
  end
99
108
 
109
+ if validate
110
+ Avro::SchemaValidator.validate!(schema, message, recursive: true, encoded: false, fail_on_extra_fields: true)
111
+ end
112
+
100
113
  stream = StringIO.new
101
114
  writer = Avro::IO::DatumWriter.new(schema)
102
115
  encoder = Avro::IO::BinaryEncoder.new(stream)
@@ -111,7 +124,7 @@ class AvroTurf
111
124
  writer.write(message, encoder)
112
125
 
113
126
  stream.string
114
- rescue Excon::Error::NotFound
127
+ rescue Excon::Errors::NotFound
115
128
  if schema_id
116
129
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
117
130
  else
@@ -169,31 +182,31 @@ class AvroTurf
169
182
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
170
183
  end
171
184
 
172
- private
173
-
174
185
  # Providing subject and version to determine the schema,
175
186
  # which skips the auto registeration of schema on the schema registry.
176
187
  # Fetch the schema from registry with the provided subject name and version.
177
- def fetch_schema(subject, version)
188
+ def fetch_schema(subject:, version: 'latest')
178
189
  schema_data = @registry.subject_version(subject, version)
179
190
  schema_id = schema_data.fetch('id')
180
191
  schema = Avro::Schema.parse(schema_data.fetch('schema'))
181
- [schema_id, schema]
192
+ [schema, schema_id]
182
193
  end
183
194
 
184
195
  # Fetch the schema from registry with the provided schema_id.
185
196
  def fetch_schema_by_id(schema_id)
186
- schema_json = @registry.fetch(schema_id)
187
- schema = Avro::Schema.parse(schema_json)
188
- [schema_id, schema]
197
+ schema = @schemas_by_id.fetch(schema_id) do
198
+ schema_json = @registry.fetch(schema_id)
199
+ Avro::Schema.parse(schema_json)
200
+ end
201
+ [schema, schema_id]
189
202
  end
190
203
 
191
204
  # Schemas are registered under the full name of the top level Avro record
192
205
  # type, or `subject` if it's provided.
193
- def register_schema(subject, schema_name, namespace)
206
+ def register_schema(schema_name:, subject: nil, namespace: nil)
194
207
  schema = @schema_store.find(schema_name, namespace)
195
208
  schema_id = @registry.register(subject || schema.fullname, schema)
196
- [schema_id, schema]
209
+ [schema, schema_id]
197
210
  end
198
211
  end
199
212
  end
@@ -22,7 +22,7 @@ class AvroTurf::SchemaStore
22
22
  # Still need to check is the schema already loaded
23
23
  return @schemas[fullname] if @schemas.key?(fullname)
24
24
 
25
- load_schema!(fullname, namespace)
25
+ load_schema!(fullname)
26
26
  end
27
27
  end
28
28
 
@@ -42,34 +42,57 @@ class AvroTurf::SchemaStore
42
42
  end
43
43
  end
44
44
 
45
- private
45
+ protected
46
46
 
47
47
  # Loads single schema
48
48
  # Such method is not thread-safe, do not call it of from mutex synchronization routine
49
- def load_schema!(fullname, namespace = nil)
50
- *namespace, schema_name = fullname.split(".")
51
- schema_path = File.join(@path, *namespace, schema_name + ".avsc")
49
+ def load_schema!(fullname, local_schemas_cache = {})
50
+ schema_path = build_schema_path(fullname)
52
51
  schema_json = JSON.parse(File.read(schema_path))
53
- schema = Avro::Schema.real_parse(schema_json, @schemas)
54
52
 
53
+ schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
54
+
55
+ # Don't cache the parsed schema until after its fullname is validated
55
56
  if schema.respond_to?(:fullname) && schema.fullname != fullname
56
57
  raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
57
58
  end
58
59
 
60
+ # Cache only this new top-level schema by its fullname. It's critical
61
+ # not to make every sub-schema resolvable at the top level here because
62
+ # multiple different avsc files may define the same sub-schema, and
63
+ # if we share the @schemas cache across all parsing contexts, the Avro
64
+ # gem will raise an Avro::SchemaParseError when parsing another avsc
65
+ # file that contains a subschema with the same fullname as one
66
+ # encountered previously in a different file:
67
+ # <Avro::SchemaParseError: The name "foo.bar" is already in use.>
68
+ # Essentially, the only schemas that should be resolvable in @schemas
69
+ # are those that have their own .avsc files on disk.
70
+ @schemas[fullname] = schema
71
+
59
72
  schema
60
73
  rescue ::Avro::SchemaParseError => e
61
74
  # This is a hack in order to figure out exactly which type was missing. The
62
75
  # Avro gem ought to provide this data directly.
63
76
  if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
64
- load_schema!($1)
77
+ # Try to first resolve a referenced schema from disk.
78
+ # If this is successful, the Avro gem will have mutated the
79
+ # local_schemas_cache, adding all the new schemas it found.
80
+ load_schema!($1, local_schemas_cache)
65
81
 
66
- # Re-resolve the original schema now that the dependency has been resolved.
67
- @schemas.delete(fullname)
68
- load_schema!(fullname)
82
+ # Attempt to re-parse the original schema now that the dependency
83
+ # has been resolved and use the now-updated local_schemas_cache to
84
+ # pick up where we left off.
85
+ local_schemas_cache.delete(fullname)
86
+ load_schema!(fullname, local_schemas_cache)
69
87
  else
70
88
  raise
71
89
  end
72
90
  rescue Errno::ENOENT, Errno::ENAMETOOLONG
73
91
  raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
74
92
  end
93
+
94
+ def build_schema_path(fullname)
95
+ *namespace, schema_name = fullname.split(".")
96
+ schema_path = File.join(@path, *namespace, schema_name + ".avsc")
97
+ end
75
98
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.11.0"
2
+ VERSION = "1.3.1"
3
3
  end
@@ -3,6 +3,8 @@ require 'avro_turf/confluent_schema_registry'
3
3
  require 'avro_turf/test/fake_confluent_schema_registry_server'
4
4
 
5
5
  describe AvroTurf::ConfluentSchemaRegistry do
6
+ let(:user) { "abc" }
7
+ let(:password) { "xxyyzz" }
6
8
  let(:client_cert) { "test client cert" }
7
9
  let(:client_key) { "test client key" }
8
10
  let(:client_key_pass) { "test client key password" }
@@ -18,4 +20,14 @@ describe AvroTurf::ConfluentSchemaRegistry do
18
20
  )
19
21
  }
20
22
  end
23
+
24
+ it_behaves_like "a confluent schema registry client" do
25
+ let(:registry) {
26
+ described_class.new(
27
+ registry_url,
28
+ user: user,
29
+ password: password,
30
+ )
31
+ }
32
+ end
21
33
  end
@@ -4,7 +4,8 @@ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
4
 
5
5
  describe AvroTurf::CachedConfluentSchemaRegistry do
6
6
  let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
- let(:cache) { AvroTurf::DiskCache.new("spec/cache")}
7
+ let(:logger_io) { StringIO.new }
8
+ let(:cache) { AvroTurf::DiskCache.new("spec/cache", logger: Logger.new(logger_io))}
8
9
  let(:registry) { described_class.new(upstream, cache: cache) }
9
10
  let(:id) { rand(999) }
10
11
  let(:schema) do
@@ -80,6 +81,40 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
80
81
  end
81
82
  end
82
83
 
84
+ describe "#fetch (zero length cache file)" do
85
+ let(:cache_after) do
86
+ {
87
+ "#{id}" => "#{schema}"
88
+ }
89
+ end
90
+
91
+ before do
92
+ # setup the disk cache with a zero length file
93
+ File.write(File.join("spec/cache", "schemas_by_id.json"), '')
94
+ end
95
+
96
+ it "skips zero length disk cache" do
97
+ # multiple calls return same result, with only one upstream call
98
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
99
+ expect(registry.fetch(id)).to eq(schema)
100
+ expect(registry.fetch(id)).to eq(schema)
101
+ expect(upstream).to have_received(:fetch).exactly(1).times
102
+ expect(load_cache("schemas_by_id.json")).to eq cache_after
103
+ expect(logger_io.string).to include("zero length file at spec/cache/schemas_by_id.json")
104
+ end
105
+ end
106
+
107
+ describe "#fetch (corrupt cache file)" do
108
+ before do
109
+ # setup the disk cache with a corrupt file (i.e. not json)
110
+ File.write(File.join("spec/cache", "schemas_by_id.json"), 'NOTJSON')
111
+ end
112
+
113
+ it "raises error on corrupt cache file" do
114
+ expect{registry.fetch(id)}.to raise_error(JSON::ParserError, /unexpected token/)
115
+ end
116
+ end
117
+
83
118
  describe "#register" do
84
119
  let(:subject_name) { "a_subject" }
85
120
  let(:cache_before) do
@@ -120,6 +155,41 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
120
155
  end
121
156
  end
122
157
 
158
+ describe "#register (zero length cache file)" do
159
+ let(:subject_name) { "a_subject" }
160
+ let(:cache_after) do
161
+ {
162
+ "#{subject_name}#{schema}" => id
163
+ }
164
+ end
165
+
166
+ before do
167
+ # setup the disk cache with a zero length file
168
+ File.write(File.join("spec/cache", "ids_by_schema.json"), '')
169
+ end
170
+
171
+ it "skips zero length disk cache" do
172
+ # multiple calls return same result, with only one upstream call
173
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
174
+ expect(registry.register(subject_name, schema)).to eq(id)
175
+ expect(registry.register(subject_name, schema)).to eq(id)
176
+ expect(upstream).to have_received(:register).exactly(1).times
177
+ expect(load_cache("ids_by_schema.json")).to eq cache_after
178
+ expect(logger_io.string).to include("zero length file at spec/cache/ids_by_schema.json")
179
+ end
180
+ end
181
+
182
+ describe "#register (corrupt cache file)" do
183
+ before do
184
+ # setup the disk cache with a corrupt file (i.e. not json)
185
+ File.write(File.join("spec/cache", "ids_by_schema.json"), 'NOTJSON')
186
+ end
187
+
188
+ it "raises error on corrupt cache file" do
189
+ expect{registry.register(subject_name, schema)}.to raise_error(JSON::ParserError, /unexpected token/)
190
+ end
191
+ end
192
+
123
193
  describe "#subject_version" do
124
194
  it "writes thru to disk cache" do
125
195
  # multiple calls return same result, with zero upstream calls
@@ -297,4 +297,106 @@ describe AvroTurf::Messaging do
297
297
  end
298
298
  end
299
299
  end
300
+
301
+ context "validating" do
302
+ subject(:encode){ avro.encode(message, schema_name: "person", validate: true) }
303
+
304
+ context "for correct message" do
305
+ it { expect { encode }.not_to raise_error }
306
+ end
307
+
308
+ context "when message has wrong type" do
309
+ let(:message) { { "full_name" => 123 } }
310
+
311
+ it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /\.full_name expected type string, got int/) }
312
+ end
313
+
314
+ context "when message contains extra fields (typo in key)" do
315
+ let(:message) { { "fulll_name" => "John Doe" } }
316
+
317
+ it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/) }
318
+ end
319
+ end
320
+
321
+ context 'fetching and registering schema' do
322
+ let(:schema_store) { AvroTurf::SchemaStore.new(path: "spec/schemas") }
323
+
324
+ let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
325
+
326
+ let(:avro) do
327
+ AvroTurf::Messaging.new(
328
+ registry: registry,
329
+ schema_store: schema_store,
330
+ logger: logger
331
+ )
332
+ end
333
+
334
+ let(:schema_id) { 234 }
335
+
336
+ context 'using fetch_schema' do
337
+ subject { avro.fetch_schema(subject: subj, version: version) }
338
+
339
+ let(:subj) { 'subject' }
340
+
341
+ let(:version) { 'version' }
342
+
343
+ let(:response) { {'id' => schema_id, 'schema' => schema_json} }
344
+
345
+ before do
346
+ allow(registry).to receive(:subject_version).with(subj, version).and_return(response)
347
+ end
348
+
349
+ it 'gets schema from registry' do
350
+ expect(subject).to eq([schema, schema_id])
351
+ end
352
+ end
353
+
354
+ context 'using fetch_schema_by_id' do
355
+ subject { avro.fetch_schema_by_id(schema_id) }
356
+
357
+ before do
358
+ allow(registry).to receive(:fetch).with(schema_id).and_return(schema_json)
359
+ end
360
+
361
+ it 'gets schema from registry' do
362
+ expect(subject).to eq([schema, schema_id])
363
+ end
364
+ end
365
+
366
+ context 'using register_schema' do
367
+ let(:schema_name) { 'schema_name' }
368
+
369
+ let(:namespace) { 'namespace' }
370
+
371
+ before do
372
+ allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(schema)
373
+ end
374
+
375
+ context 'when subject is not set' do
376
+ subject { avro.register_schema(schema_name: schema_name, namespace: namespace) }
377
+
378
+ before do
379
+ allow(registry).to receive(:register).with(schema.fullname, schema).and_return(schema_id)
380
+ end
381
+
382
+ it 'registers schema in registry' do
383
+ expect(subject).to eq([schema, schema_id])
384
+ end
385
+ end
386
+
387
+ context 'when subject is set' do
388
+ subject { avro.register_schema(schema_name: schema_name, namespace: namespace, subject: subj) }
389
+
390
+ let(:subj) { 'subject' }
391
+
392
+ before do
393
+ allow(registry).to receive(:register).with(subj, schema).and_return(schema_id)
394
+ end
395
+
396
+ it 'registers schema in registry' do
397
+ expect(subject).to eq([schema, schema_id])
398
+ end
399
+ end
400
+ end
401
+ end
300
402
  end
@@ -198,6 +198,104 @@ describe AvroTurf::SchemaStore do
198
198
  expect(schema.fullname).to eq "person"
199
199
  end
200
200
 
201
+ # This test would fail under avro_turf <= v0.11.0
202
+ it "does NOT cache *nested* schemas in memory" do
203
+ FileUtils.mkdir_p("spec/schemas/test")
204
+
205
+ define_schema "test/person.avsc", <<-AVSC
206
+ {
207
+ "name": "person",
208
+ "namespace": "test",
209
+ "type": "record",
210
+ "fields": [
211
+ {
212
+ "name": "address",
213
+ "type": {
214
+ "name": "address",
215
+ "type": "record",
216
+ "fields": [
217
+ { "name": "addr1", "type": "string" },
218
+ { "name": "addr2", "type": "string" },
219
+ { "name": "city", "type": "string" },
220
+ { "name": "zip", "type": "string" }
221
+ ]
222
+ }
223
+ }
224
+ ]
225
+ }
226
+ AVSC
227
+
228
+ schema = store.find('person', 'test')
229
+ expect(schema.fullname).to eq "test.person"
230
+
231
+ expect { store.find('address', 'test') }.
232
+ to raise_error(AvroTurf::SchemaNotFoundError)
233
+ end
234
+
235
+ # This test would fail under avro_turf <= v0.11.0
236
+ it "allows two different avsc files to define nested sub-schemas with the same fullname" do
237
+ FileUtils.mkdir_p("spec/schemas/test")
238
+
239
+ define_schema "test/person.avsc", <<-AVSC
240
+ {
241
+ "name": "person",
242
+ "namespace": "test",
243
+ "type": "record",
244
+ "fields": [
245
+ {
246
+ "name": "location",
247
+ "type": {
248
+ "name": "location",
249
+ "type": "record",
250
+ "fields": [
251
+ { "name": "city", "type": "string" },
252
+ { "name": "zipcode", "type": "string" }
253
+ ]
254
+ }
255
+ }
256
+ ]
257
+ }
258
+ AVSC
259
+
260
+ define_schema "test/company.avsc", <<-AVSC
261
+ {
262
+ "name": "company",
263
+ "namespace": "test",
264
+ "type": "record",
265
+ "fields": [
266
+ {
267
+ "name": "headquarters",
268
+ "type": {
269
+ "name": "location",
270
+ "type": "record",
271
+ "fields": [
272
+ { "name": "city", "type": "string" },
273
+ { "name": "postcode", "type": "string" }
274
+ ]
275
+ }
276
+ }
277
+ ]
278
+ }
279
+ AVSC
280
+
281
+ company = nil
282
+ person = store.find('person', 'test')
283
+
284
+ # This should *NOT* raise the error:
285
+ # #<Avro::SchemaParseError: The name "test.location" is already in use.>
286
+ expect { company = store.find('company', 'test') }.not_to raise_error
287
+
288
+ person_location_field = person.fields_hash['location']
289
+ expect(person_location_field.type.name).to eq('location')
290
+ expect(person_location_field.type.fields_hash).to include('zipcode')
291
+ expect(person_location_field.type.fields_hash).not_to include('postcode')
292
+
293
+ company_headquarters_field = company.fields_hash['headquarters']
294
+ expect(company_headquarters_field.type.name).to eq('location')
295
+ expect(company_headquarters_field.type.fields_hash).to include('postcode')
296
+ expect(company_headquarters_field.type.fields_hash).not_to include('zipcode')
297
+ end
298
+
201
299
  it "is thread safe" do
202
300
  define_schema "address.avsc", <<-AVSC
203
301
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-25 00:00:00.000000000 Z
11
+ date: 2021-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 1.7.7
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '1.10'
22
+ version: '1.11'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,21 +29,21 @@ dependencies:
29
29
  version: 1.7.7
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.10'
32
+ version: '1.11'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: excon
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '0.45'
39
+ version: '0.71'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.45'
46
+ version: '0.71'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -163,8 +163,8 @@ executables: []
163
163
  extensions: []
164
164
  extra_rdoc_files: []
165
165
  files:
166
- - ".circleci/config.yml"
167
166
  - ".github/workflows/ruby.yml"
167
+ - ".github/workflows/stale.yml"
168
168
  - ".gitignore"
169
169
  - ".rspec"
170
170
  - CHANGELOG.md
@@ -249,8 +249,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
249
249
  - !ruby/object:Gem::Version
250
250
  version: '0'
251
251
  requirements: []
252
- rubyforge_project:
253
- rubygems_version: 2.7.6
252
+ rubygems_version: 3.1.2
254
253
  signing_key:
255
254
  specification_version: 4
256
255
  summary: A library that makes it easier to use the Avro serialization format from
data/.circleci/config.yml DELETED
@@ -1,36 +0,0 @@
1
- version: 2
2
- jobs:
3
- build:
4
- environment:
5
- CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
6
- CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
7
- docker:
8
- - image: circleci/ruby:2.6.2
9
- steps:
10
- - checkout
11
- - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
12
- - restore_cache:
13
- keys:
14
- # This branch if available
15
- - v1-dep-{{ .Branch }}-
16
- # Default branch if not
17
- - v1-dep-master-
18
- # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
19
- - v1-dep-
20
- - run: gem install bundler --no-document
21
- - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
22
- # Save dependency cache
23
- - save_cache:
24
- key: v1-dep-{{ .Branch }}-{{ epoch }}
25
- paths:
26
- - vendor/bundle
27
- - ~/.bundle
28
- - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
29
- - run:
30
- command: bundle exec rspec --color --require spec_helper --format progress
31
- - store_test_results:
32
- path: /tmp/circleci-test-results
33
- - store_artifacts:
34
- path: /tmp/circleci-artifacts
35
- - store_artifacts:
36
- path: /tmp/circleci-test-results