avro_turf 0.11.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e2ee4d9598bcaa2ec5738a1130fae0b19be7b5e7250f27540313b7471f97e23
4
- data.tar.gz: 0b441cb30a153958c2ea283300a1a05c26218e2a97cb807fce36f8ad9d0240da
3
+ metadata.gz: a4e9638703b961c348d06adcfa3e34ac2bd908e68f9cf7762a550ae4ef453b8a
4
+ data.tar.gz: 6d53b1f9b5e4b9e5a3aaeb13207bf350e2b56c8db138cb499643180a94a684b7
5
5
  SHA512:
6
- metadata.gz: 12779eac5c325752cfa1be34da94ef5f332490cda4ff0aef29529a00557008cecf39592396a3f3525a2a12cb67a46744800781e0da69d4bf02511f5a2284e5e7
7
- data.tar.gz: a2e4c84fb338d62296aefb8ae8c206c262d756475ec11bd9cead17bfe6015ea069ff36a891df3381385fa650cb4b5ea1584855fccc5294f5910ab34563ad973a
6
+ metadata.gz: 7045f852f20d3ddeca3256724c918ed255722a1556f662d159b805921670d0a2af3e574e3c967bee7622d5d3e05f5348d6023b250006c3651cb7b1f9cec059bf
7
+ data.tar.gz: 50ca19fb058246ba19b28472e03bedcfbc09ddcd336d46deb29dec4b406d81a65c2b0f6ca84ec36d5227da035906976af133cf757cbfc2f20abda116c8e17d8c
@@ -1,11 +1,15 @@
1
1
  name: Ruby
2
2
 
3
- on: [push]
3
+ on: [push, pull_request]
4
4
 
5
5
  jobs:
6
6
  build:
7
7
 
8
8
  runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ ruby-version: [1.8.x, 1.9.x, 2.0.x, 2.1.x, 2.2.x,
12
+ 2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
9
13
 
10
14
  steps:
11
15
  - uses: actions/checkout@v1
@@ -0,0 +1,19 @@
1
+ name: Mark stale issues and pull requests
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 0 * * *"
6
+
7
+ jobs:
8
+ stale:
9
+
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - uses: actions/stale@v1
14
+ with:
15
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
16
+ stale-issue-message: 'Stale issue message'
17
+ stale-pr-message: 'Stale pull request message'
18
+ stale-issue-label: 'no-issue-activity'
19
+ stale-pr-label: 'no-pr-activity'
data/CHANGELOG.md CHANGED
@@ -1,7 +1,29 @@
1
- # avro_turf
1
+ # AvroTurf
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.3.1
6
+
7
+ - Prevent CachedConfluentSchemaRegistry from caching the 'latest' version (#140)
8
+ - Fix issue with zero length schema cache file (#138)
9
+
10
+ ## v1.3.0
11
+
12
+ - Add support for plain user/password auth to ConfluentSchemaRegistry (#120)
13
+
14
+ ## v1.2.0
15
+
16
+ - Expose `fetch_schema`, `fetch_schema_by_id` and `register_schema` schema in `Messaging` interface (#117, #119)
17
+ - Add ability to validate message before encoding in `Messaging#encode` interface (#116, #118)
18
+
19
+ ## v1.1.0
20
+
21
+ - Compatibility with Avro v1.10.x.
22
+
23
+ ## v1.0.0
24
+
25
+ - Stop caching nested sub-schemas (#111)
26
+
5
27
  ## v0.11.0
6
28
 
7
29
  - Add proxy support (#107)
data/README.md CHANGED
@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
16
16
 
17
17
  The aliases for the original names will be removed in a future release.
18
18
 
19
+ ## Note about finding nested schemas
20
+
21
+ As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
22
+
23
+ As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
24
+
25
+ ```json
26
+ {
27
+ "name": "person",
28
+ "namespace": "contacts",
29
+ "type": "record",
30
+ "fields": [
31
+ {
32
+ "name": "address",
33
+ "type": {
34
+ "name": "address",
35
+ "type": "record",
36
+ "fields": [
37
+ { "name": "addr1", "type": "string" },
38
+ { "name": "addr2", "type": "string" },
39
+ { "name": "city", "type": "string" },
40
+ { "name": "zip", "type": "string" }
41
+ ]
42
+ }
43
+ }
44
+ ]
45
+ }
46
+ ```
47
+ ...this will no longer work in v0.12.0:
48
+ ```ruby
49
+ store = AvroTurf::SchemaStore.new(path: 'my/schemas')
50
+ store.load_schemas!
51
+
52
+ # Accessing 'person' is correct and works fine.
53
+ person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
54
+
55
+ # Trying to access 'address' raises AvroTurf::SchemaNotFoundError
56
+ address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
57
+ ```
58
+
59
+ For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
60
+
19
61
  ## Installation
20
62
 
21
63
  Add this line to your application's Gemfile:
@@ -136,6 +178,10 @@ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version:
136
178
  # of the same schema version will be served by the cache.
137
179
  data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
138
180
 
181
+ # Message can be validated before encoding to get a description of problem through
182
+ # Avro::SchemaValidator::ValidationError exception
183
+ data = avro.encode({ "titl" => "hello, world" }, schema_name: "greeting", validate: true)
184
+
139
185
  # When decoding, the schema will be fetched from the registry and cached. Subsequent
140
186
  # instances of the same schema id will be served by the cache.
141
187
  avro.decode(data) #=> { "title" => "hello, world" }
@@ -147,6 +193,20 @@ result.message #=> { "title" => "hello, world" }
147
193
  result.schema_id #=> 3
148
194
  result.writer_schema #=> #<Avro::Schema: ...>
149
195
  result.reader_schema #=> nil
196
+
197
+ # You can also work with schema through this interface:
198
+ # Fetch latest schema for subject from registry
199
+ schema, schema_id = avro.fetch_schema(subject: 'greeting')
200
+ # Fetch specific version
201
+ schema, schema_id = avro.fetch_schema(subject: 'greeting', version: 1)
202
+ # Fetch schema by id
203
+ schema, schema_id = avro.fetch_schema_by_id(3)
204
+ # Register schema fetched from store by name
205
+ schema, schema_id = avro.register_schema(schema_name: 'greeting')
206
+ # Specify namespace (same as schema_name: 'somewhere.greeting')
207
+ schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere')
208
+ # Customize subject under which to register schema
209
+ schema, schema_id = avro.register_schema(schema_name: 'greeting', namespace: 'somewhere', subject: 'test')
150
210
  ```
151
211
 
152
212
  ### Confluent Schema Registry Client
data/avro_turf.gemspec CHANGED
@@ -17,8 +17,8 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
21
- spec.add_dependency "excon", "~> 0.45"
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.11"
21
+ spec.add_dependency "excon", "~> 0.71"
22
22
 
23
23
  spec.add_development_dependency "bundler", "~> 2.0"
24
24
  spec.add_development_dependency "rake", "~> 13.0"
@@ -33,6 +33,8 @@ class AvroTurf::CachedConfluentSchemaRegistry
33
33
  end
34
34
 
35
35
  def subject_version(subject, version = 'latest')
36
+ return @upstream.subject_version(subject, version) if version == 'latest'
37
+
36
38
  @cache.lookup_by_version(subject, version) ||
37
39
  @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
38
40
  end
@@ -7,6 +7,8 @@ class AvroTurf::ConfluentSchemaRegistry
7
7
  url,
8
8
  logger: Logger.new($stdout),
9
9
  proxy: nil,
10
+ user: nil,
11
+ password: nil,
10
12
  client_cert: nil,
11
13
  client_key: nil,
12
14
  client_key_pass: nil,
@@ -17,10 +19,12 @@ class AvroTurf::ConfluentSchemaRegistry
17
19
  headers = {
18
20
  "Content-Type" => CONTENT_TYPE
19
21
  }
20
- headers[:proxy] = proxy if proxy&.present?
22
+ headers[:proxy] = proxy unless proxy.nil?
21
23
  @connection = Excon.new(
22
24
  url,
23
25
  headers: headers,
26
+ user: user,
27
+ password: password,
24
28
  client_cert: client_cert,
25
29
  client_key: client_key,
26
30
  client_key_pass: client_key_pass,
@@ -2,15 +2,19 @@
2
2
  # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
3
3
  class AvroTurf::DiskCache < AvroTurf::InMemoryCache
4
4
 
5
- def initialize(disk_path)
5
+ def initialize(disk_path, logger: Logger.new($stdout))
6
6
  super()
7
7
 
8
+ @logger = logger
9
+
8
10
  # load the write-thru cache on startup, if it exists
9
11
  @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
10
- @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
12
+ hash = read_from_disk_cache(@schemas_by_id_path)
13
+ @schemas_by_id = hash if hash
11
14
 
12
15
  @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
13
- @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
16
+ hash = read_from_disk_cache(@ids_by_schema_path)
17
+ @ids_by_schema = hash if hash
14
18
 
15
19
  @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
16
20
  @schemas_by_subject_version = {}
@@ -31,12 +35,18 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
31
35
  return value
32
36
  end
33
37
 
34
- # override to include write-thru cache after storing result from upstream
38
+ # override to use a json serializable cache key
39
+ def lookup_by_schema(subject, schema)
40
+ key = "#{subject}#{schema}"
41
+ @ids_by_schema[key]
42
+ end
43
+
44
+ # override to use a json serializable cache key and update the file cache
35
45
  def store_by_schema(subject, schema, id)
36
- # must return the value from storing the result (i.e. do not return result from file write)
37
- value = super
46
+ key = "#{subject}#{schema}"
47
+ @ids_by_schema[key] = id
38
48
  File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
39
- return value
49
+ id
40
50
  end
41
51
 
42
52
  # checks instance var (in-memory cache) for schema
@@ -49,7 +59,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
49
59
 
50
60
  return schema unless schema.nil?
51
61
 
52
- hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
62
+ hash = read_from_disk_cache(@schemas_by_subject_version_path)
53
63
  if hash
54
64
  @schemas_by_subject_version = hash
55
65
  @schemas_by_subject_version[key]
@@ -63,7 +73,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
63
73
  # update instance var (in memory-cache) to match
64
74
  def store_by_version(subject, version, schema)
65
75
  key = "#{subject}#{version}"
66
- hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
76
+ hash = read_from_disk_cache(@schemas_by_subject_version_path)
67
77
  hash = if hash
68
78
  hash[key] = schema
69
79
  hash
@@ -77,6 +87,19 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
77
87
  @schemas_by_subject_version[key]
78
88
  end
79
89
 
90
+ # Parse the file from disk, if it exists and is not zero length
91
+ private def read_from_disk_cache(path)
92
+ if File.exist?(path)
93
+ if File.size(path)!=0
94
+ return JSON.parse(File.read(path))
95
+ else
96
+ # just log a message if skipping zero length file
97
+ @logger.warn "skipping JSON.parse of zero length file at #{path}"
98
+ end
99
+ end
100
+ return nil
101
+ end
102
+
80
103
  private def write_to_disk_cache(path, hash)
81
104
  File.write(path, JSON.pretty_generate(hash))
82
105
  end
@@ -17,12 +17,12 @@ class AvroTurf::InMemoryCache
17
17
  end
18
18
 
19
19
  def lookup_by_schema(subject, schema)
20
- key = subject + schema.to_s
20
+ key = [subject, schema.to_s]
21
21
  @ids_by_schema[key]
22
22
  end
23
23
 
24
24
  def store_by_schema(subject, schema, id)
25
- key = subject + schema.to_s
25
+ key = [subject, schema.to_s]
26
26
  @ids_by_schema[key] = id
27
27
  end
28
28
 
@@ -34,6 +34,8 @@ class AvroTurf
34
34
  # namespace - The String default schema namespace.
35
35
  # logger - The Logger that should be used to log information (optional).
36
36
  # proxy - Forward the request via proxy (optional).
37
+ # user - User for basic auth (optional).
38
+ # password - Password for basic auth (optional).
37
39
  # client_cert - Name of file containing client certificate (optional).
38
40
  # client_key - Name of file containing client private key to go with client_cert (optional).
39
41
  # client_key_pass - Password to go with client_key (optional).
@@ -47,6 +49,8 @@ class AvroTurf
47
49
  namespace: nil,
48
50
  logger: nil,
49
51
  proxy: nil,
52
+ user: nil,
53
+ password: nil,
50
54
  client_cert: nil,
51
55
  client_key: nil,
52
56
  client_key_pass: nil,
@@ -61,6 +65,8 @@ class AvroTurf
61
65
  registry_url,
62
66
  logger: @logger,
63
67
  proxy: proxy,
68
+ user: user,
69
+ password: password,
64
70
  client_cert: client_cert,
65
71
  client_key: client_key,
66
72
  client_key_pass: client_key_pass,
@@ -84,19 +90,26 @@ class AvroTurf
84
90
  # the data. Must match the schema used when encoding (optional).
85
91
  # schema_id - The integer id of the schema that should be used to encode
86
92
  # the data.
93
+ # validate - The boolean for performing complete message validation before
94
+ # encoding it, Avro::SchemaValidator::ValidationError with
95
+ # a descriptive message will be raised in case of invalid message.
87
96
  #
88
97
  # Returns the encoded data as a String.
89
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil)
90
- schema_id, schema = if schema_id
98
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil, validate: false)
99
+ schema, schema_id = if schema_id
91
100
  fetch_schema_by_id(schema_id)
92
101
  elsif subject && version
93
- fetch_schema(subject, version)
102
+ fetch_schema(subject: subject, version: version)
94
103
  elsif schema_name
95
- register_schema(subject, schema_name, namespace)
104
+ register_schema(subject: subject, schema_name: schema_name, namespace: namespace)
96
105
  else
97
106
  raise ArgumentError.new('Neither schema_name nor schema_id nor subject + version provided to determine the schema.')
98
107
  end
99
108
 
109
+ if validate
110
+ Avro::SchemaValidator.validate!(schema, message, recursive: true, encoded: false, fail_on_extra_fields: true)
111
+ end
112
+
100
113
  stream = StringIO.new
101
114
  writer = Avro::IO::DatumWriter.new(schema)
102
115
  encoder = Avro::IO::BinaryEncoder.new(stream)
@@ -111,7 +124,7 @@ class AvroTurf
111
124
  writer.write(message, encoder)
112
125
 
113
126
  stream.string
114
- rescue Excon::Error::NotFound
127
+ rescue Excon::Errors::NotFound
115
128
  if schema_id
116
129
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
117
130
  else
@@ -169,31 +182,31 @@ class AvroTurf
169
182
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
170
183
  end
171
184
 
172
- private
173
-
174
185
  # Providing subject and version to determine the schema,
175
186
  # which skips the auto registeration of schema on the schema registry.
176
187
  # Fetch the schema from registry with the provided subject name and version.
177
- def fetch_schema(subject, version)
188
+ def fetch_schema(subject:, version: 'latest')
178
189
  schema_data = @registry.subject_version(subject, version)
179
190
  schema_id = schema_data.fetch('id')
180
191
  schema = Avro::Schema.parse(schema_data.fetch('schema'))
181
- [schema_id, schema]
192
+ [schema, schema_id]
182
193
  end
183
194
 
184
195
  # Fetch the schema from registry with the provided schema_id.
185
196
  def fetch_schema_by_id(schema_id)
186
- schema_json = @registry.fetch(schema_id)
187
- schema = Avro::Schema.parse(schema_json)
188
- [schema_id, schema]
197
+ schema = @schemas_by_id.fetch(schema_id) do
198
+ schema_json = @registry.fetch(schema_id)
199
+ Avro::Schema.parse(schema_json)
200
+ end
201
+ [schema, schema_id]
189
202
  end
190
203
 
191
204
  # Schemas are registered under the full name of the top level Avro record
192
205
  # type, or `subject` if it's provided.
193
- def register_schema(subject, schema_name, namespace)
206
+ def register_schema(schema_name:, subject: nil, namespace: nil)
194
207
  schema = @schema_store.find(schema_name, namespace)
195
208
  schema_id = @registry.register(subject || schema.fullname, schema)
196
- [schema_id, schema]
209
+ [schema, schema_id]
197
210
  end
198
211
  end
199
212
  end
@@ -22,7 +22,7 @@ class AvroTurf::SchemaStore
22
22
  # Still need to check is the schema already loaded
23
23
  return @schemas[fullname] if @schemas.key?(fullname)
24
24
 
25
- load_schema!(fullname, namespace)
25
+ load_schema!(fullname)
26
26
  end
27
27
  end
28
28
 
@@ -42,34 +42,57 @@ class AvroTurf::SchemaStore
42
42
  end
43
43
  end
44
44
 
45
- private
45
+ protected
46
46
 
47
47
  # Loads single schema
48
48
  # Such method is not thread-safe, do not call it of from mutex synchronization routine
49
- def load_schema!(fullname, namespace = nil)
50
- *namespace, schema_name = fullname.split(".")
51
- schema_path = File.join(@path, *namespace, schema_name + ".avsc")
49
+ def load_schema!(fullname, local_schemas_cache = {})
50
+ schema_path = build_schema_path(fullname)
52
51
  schema_json = JSON.parse(File.read(schema_path))
53
- schema = Avro::Schema.real_parse(schema_json, @schemas)
54
52
 
53
+ schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
54
+
55
+ # Don't cache the parsed schema until after its fullname is validated
55
56
  if schema.respond_to?(:fullname) && schema.fullname != fullname
56
57
  raise AvroTurf::SchemaError, "expected schema `#{schema_path}' to define type `#{fullname}'"
57
58
  end
58
59
 
60
+ # Cache only this new top-level schema by its fullname. It's critical
61
+ # not to make every sub-schema resolvable at the top level here because
62
+ # multiple different avsc files may define the same sub-schema, and
63
+ # if we share the @schemas cache across all parsing contexts, the Avro
64
+ # gem will raise an Avro::SchemaParseError when parsing another avsc
65
+ # file that contains a subschema with the same fullname as one
66
+ # encountered previously in a different file:
67
+ # <Avro::SchemaParseError: The name "foo.bar" is already in use.>
68
+ # Essentially, the only schemas that should be resolvable in @schemas
69
+ # are those that have their own .avsc files on disk.
70
+ @schemas[fullname] = schema
71
+
59
72
  schema
60
73
  rescue ::Avro::SchemaParseError => e
61
74
  # This is a hack in order to figure out exactly which type was missing. The
62
75
  # Avro gem ought to provide this data directly.
63
76
  if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
64
- load_schema!($1)
77
+ # Try to first resolve a referenced schema from disk.
78
+ # If this is successful, the Avro gem will have mutated the
79
+ # local_schemas_cache, adding all the new schemas it found.
80
+ load_schema!($1, local_schemas_cache)
65
81
 
66
- # Re-resolve the original schema now that the dependency has been resolved.
67
- @schemas.delete(fullname)
68
- load_schema!(fullname)
82
+ # Attempt to re-parse the original schema now that the dependency
83
+ # has been resolved and use the now-updated local_schemas_cache to
84
+ # pick up where we left off.
85
+ local_schemas_cache.delete(fullname)
86
+ load_schema!(fullname, local_schemas_cache)
69
87
  else
70
88
  raise
71
89
  end
72
90
  rescue Errno::ENOENT, Errno::ENAMETOOLONG
73
91
  raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
74
92
  end
93
+
94
+ def build_schema_path(fullname)
95
+ *namespace, schema_name = fullname.split(".")
96
+ schema_path = File.join(@path, *namespace, schema_name + ".avsc")
97
+ end
75
98
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.11.0"
2
+ VERSION = "1.3.1"
3
3
  end
@@ -3,6 +3,8 @@ require 'avro_turf/confluent_schema_registry'
3
3
  require 'avro_turf/test/fake_confluent_schema_registry_server'
4
4
 
5
5
  describe AvroTurf::ConfluentSchemaRegistry do
6
+ let(:user) { "abc" }
7
+ let(:password) { "xxyyzz" }
6
8
  let(:client_cert) { "test client cert" }
7
9
  let(:client_key) { "test client key" }
8
10
  let(:client_key_pass) { "test client key password" }
@@ -18,4 +20,14 @@ describe AvroTurf::ConfluentSchemaRegistry do
18
20
  )
19
21
  }
20
22
  end
23
+
24
+ it_behaves_like "a confluent schema registry client" do
25
+ let(:registry) {
26
+ described_class.new(
27
+ registry_url,
28
+ user: user,
29
+ password: password,
30
+ )
31
+ }
32
+ end
21
33
  end
@@ -4,7 +4,8 @@ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
4
 
5
5
  describe AvroTurf::CachedConfluentSchemaRegistry do
6
6
  let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
- let(:cache) { AvroTurf::DiskCache.new("spec/cache")}
7
+ let(:logger_io) { StringIO.new }
8
+ let(:cache) { AvroTurf::DiskCache.new("spec/cache", logger: Logger.new(logger_io))}
8
9
  let(:registry) { described_class.new(upstream, cache: cache) }
9
10
  let(:id) { rand(999) }
10
11
  let(:schema) do
@@ -80,6 +81,40 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
80
81
  end
81
82
  end
82
83
 
84
+ describe "#fetch (zero length cache file)" do
85
+ let(:cache_after) do
86
+ {
87
+ "#{id}" => "#{schema}"
88
+ }
89
+ end
90
+
91
+ before do
92
+ # setup the disk cache with a zero length file
93
+ File.write(File.join("spec/cache", "schemas_by_id.json"), '')
94
+ end
95
+
96
+ it "skips zero length disk cache" do
97
+ # multiple calls return same result, with only one upstream call
98
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
99
+ expect(registry.fetch(id)).to eq(schema)
100
+ expect(registry.fetch(id)).to eq(schema)
101
+ expect(upstream).to have_received(:fetch).exactly(1).times
102
+ expect(load_cache("schemas_by_id.json")).to eq cache_after
103
+ expect(logger_io.string).to include("zero length file at spec/cache/schemas_by_id.json")
104
+ end
105
+ end
106
+
107
+ describe "#fetch (corrupt cache file)" do
108
+ before do
109
+ # setup the disk cache with a corrupt file (i.e. not json)
110
+ File.write(File.join("spec/cache", "schemas_by_id.json"), 'NOTJSON')
111
+ end
112
+
113
+ it "raises error on corrupt cache file" do
114
+ expect{registry.fetch(id)}.to raise_error(JSON::ParserError, /unexpected token/)
115
+ end
116
+ end
117
+
83
118
  describe "#register" do
84
119
  let(:subject_name) { "a_subject" }
85
120
  let(:cache_before) do
@@ -120,6 +155,41 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
120
155
  end
121
156
  end
122
157
 
158
+ describe "#register (zero length cache file)" do
159
+ let(:subject_name) { "a_subject" }
160
+ let(:cache_after) do
161
+ {
162
+ "#{subject_name}#{schema}" => id
163
+ }
164
+ end
165
+
166
+ before do
167
+ # setup the disk cache with a zero length file
168
+ File.write(File.join("spec/cache", "ids_by_schema.json"), '')
169
+ end
170
+
171
+ it "skips zero length disk cache" do
172
+ # multiple calls return same result, with only one upstream call
173
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
174
+ expect(registry.register(subject_name, schema)).to eq(id)
175
+ expect(registry.register(subject_name, schema)).to eq(id)
176
+ expect(upstream).to have_received(:register).exactly(1).times
177
+ expect(load_cache("ids_by_schema.json")).to eq cache_after
178
+ expect(logger_io.string).to include("zero length file at spec/cache/ids_by_schema.json")
179
+ end
180
+ end
181
+
182
+ describe "#register (corrupt cache file)" do
183
+ before do
184
+ # setup the disk cache with a corrupt file (i.e. not json)
185
+ File.write(File.join("spec/cache", "ids_by_schema.json"), 'NOTJSON')
186
+ end
187
+
188
+ it "raises error on corrupt cache file" do
189
+ expect{registry.register(subject_name, schema)}.to raise_error(JSON::ParserError, /unexpected token/)
190
+ end
191
+ end
192
+
123
193
  describe "#subject_version" do
124
194
  it "writes thru to disk cache" do
125
195
  # multiple calls return same result, with zero upstream calls
@@ -297,4 +297,106 @@ describe AvroTurf::Messaging do
297
297
  end
298
298
  end
299
299
  end
300
+
301
+ context "validating" do
302
+ subject(:encode){ avro.encode(message, schema_name: "person", validate: true) }
303
+
304
+ context "for correct message" do
305
+ it { expect { encode }.not_to raise_error }
306
+ end
307
+
308
+ context "when message has wrong type" do
309
+ let(:message) { { "full_name" => 123 } }
310
+
311
+ it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /\.full_name expected type string, got int/) }
312
+ end
313
+
314
+ context "when message contains extra fields (typo in key)" do
315
+ let(:message) { { "fulll_name" => "John Doe" } }
316
+
317
+ it { expect { encode }.to raise_error(Avro::SchemaValidator::ValidationError, /extra field 'fulll_name'/) }
318
+ end
319
+ end
320
+
321
+ context 'fetching and registering schema' do
322
+ let(:schema_store) { AvroTurf::SchemaStore.new(path: "spec/schemas") }
323
+
324
+ let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
325
+
326
+ let(:avro) do
327
+ AvroTurf::Messaging.new(
328
+ registry: registry,
329
+ schema_store: schema_store,
330
+ logger: logger
331
+ )
332
+ end
333
+
334
+ let(:schema_id) { 234 }
335
+
336
+ context 'using fetch_schema' do
337
+ subject { avro.fetch_schema(subject: subj, version: version) }
338
+
339
+ let(:subj) { 'subject' }
340
+
341
+ let(:version) { 'version' }
342
+
343
+ let(:response) { {'id' => schema_id, 'schema' => schema_json} }
344
+
345
+ before do
346
+ allow(registry).to receive(:subject_version).with(subj, version).and_return(response)
347
+ end
348
+
349
+ it 'gets schema from registry' do
350
+ expect(subject).to eq([schema, schema_id])
351
+ end
352
+ end
353
+
354
+ context 'using fetch_schema_by_id' do
355
+ subject { avro.fetch_schema_by_id(schema_id) }
356
+
357
+ before do
358
+ allow(registry).to receive(:fetch).with(schema_id).and_return(schema_json)
359
+ end
360
+
361
+ it 'gets schema from registry' do
362
+ expect(subject).to eq([schema, schema_id])
363
+ end
364
+ end
365
+
366
+ context 'using register_schema' do
367
+ let(:schema_name) { 'schema_name' }
368
+
369
+ let(:namespace) { 'namespace' }
370
+
371
+ before do
372
+ allow(schema_store).to receive(:find).with(schema_name, namespace).and_return(schema)
373
+ end
374
+
375
+ context 'when subject is not set' do
376
+ subject { avro.register_schema(schema_name: schema_name, namespace: namespace) }
377
+
378
+ before do
379
+ allow(registry).to receive(:register).with(schema.fullname, schema).and_return(schema_id)
380
+ end
381
+
382
+ it 'registers schema in registry' do
383
+ expect(subject).to eq([schema, schema_id])
384
+ end
385
+ end
386
+
387
+ context 'when subject is set' do
388
+ subject { avro.register_schema(schema_name: schema_name, namespace: namespace, subject: subj) }
389
+
390
+ let(:subj) { 'subject' }
391
+
392
+ before do
393
+ allow(registry).to receive(:register).with(subj, schema).and_return(schema_id)
394
+ end
395
+
396
+ it 'registers schema in registry' do
397
+ expect(subject).to eq([schema, schema_id])
398
+ end
399
+ end
400
+ end
401
+ end
300
402
  end
@@ -198,6 +198,104 @@ describe AvroTurf::SchemaStore do
198
198
  expect(schema.fullname).to eq "person"
199
199
  end
200
200
 
201
+ # This test would fail under avro_turf <= v0.11.0
202
+ it "does NOT cache *nested* schemas in memory" do
203
+ FileUtils.mkdir_p("spec/schemas/test")
204
+
205
+ define_schema "test/person.avsc", <<-AVSC
206
+ {
207
+ "name": "person",
208
+ "namespace": "test",
209
+ "type": "record",
210
+ "fields": [
211
+ {
212
+ "name": "address",
213
+ "type": {
214
+ "name": "address",
215
+ "type": "record",
216
+ "fields": [
217
+ { "name": "addr1", "type": "string" },
218
+ { "name": "addr2", "type": "string" },
219
+ { "name": "city", "type": "string" },
220
+ { "name": "zip", "type": "string" }
221
+ ]
222
+ }
223
+ }
224
+ ]
225
+ }
226
+ AVSC
227
+
228
+ schema = store.find('person', 'test')
229
+ expect(schema.fullname).to eq "test.person"
230
+
231
+ expect { store.find('address', 'test') }.
232
+ to raise_error(AvroTurf::SchemaNotFoundError)
233
+ end
234
+
235
+ # This test would fail under avro_turf <= v0.11.0
236
+ it "allows two different avsc files to define nested sub-schemas with the same fullname" do
237
+ FileUtils.mkdir_p("spec/schemas/test")
238
+
239
+ define_schema "test/person.avsc", <<-AVSC
240
+ {
241
+ "name": "person",
242
+ "namespace": "test",
243
+ "type": "record",
244
+ "fields": [
245
+ {
246
+ "name": "location",
247
+ "type": {
248
+ "name": "location",
249
+ "type": "record",
250
+ "fields": [
251
+ { "name": "city", "type": "string" },
252
+ { "name": "zipcode", "type": "string" }
253
+ ]
254
+ }
255
+ }
256
+ ]
257
+ }
258
+ AVSC
259
+
260
+ define_schema "test/company.avsc", <<-AVSC
261
+ {
262
+ "name": "company",
263
+ "namespace": "test",
264
+ "type": "record",
265
+ "fields": [
266
+ {
267
+ "name": "headquarters",
268
+ "type": {
269
+ "name": "location",
270
+ "type": "record",
271
+ "fields": [
272
+ { "name": "city", "type": "string" },
273
+ { "name": "postcode", "type": "string" }
274
+ ]
275
+ }
276
+ }
277
+ ]
278
+ }
279
+ AVSC
280
+
281
+ company = nil
282
+ person = store.find('person', 'test')
283
+
284
+ # This should *NOT* raise the error:
285
+ # #<Avro::SchemaParseError: The name "test.location" is already in use.>
286
+ expect { company = store.find('company', 'test') }.not_to raise_error
287
+
288
+ person_location_field = person.fields_hash['location']
289
+ expect(person_location_field.type.name).to eq('location')
290
+ expect(person_location_field.type.fields_hash).to include('zipcode')
291
+ expect(person_location_field.type.fields_hash).not_to include('postcode')
292
+
293
+ company_headquarters_field = company.fields_hash['headquarters']
294
+ expect(company_headquarters_field.type.name).to eq('location')
295
+ expect(company_headquarters_field.type.fields_hash).to include('postcode')
296
+ expect(company_headquarters_field.type.fields_hash).not_to include('zipcode')
297
+ end
298
+
201
299
  it "is thread safe" do
202
300
  define_schema "address.avsc", <<-AVSC
203
301
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-25 00:00:00.000000000 Z
11
+ date: 2021-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 1.7.7
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '1.10'
22
+ version: '1.11'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,21 +29,21 @@ dependencies:
29
29
  version: 1.7.7
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.10'
32
+ version: '1.11'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: excon
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '0.45'
39
+ version: '0.71'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.45'
46
+ version: '0.71'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -163,8 +163,8 @@ executables: []
163
163
  extensions: []
164
164
  extra_rdoc_files: []
165
165
  files:
166
- - ".circleci/config.yml"
167
166
  - ".github/workflows/ruby.yml"
167
+ - ".github/workflows/stale.yml"
168
168
  - ".gitignore"
169
169
  - ".rspec"
170
170
  - CHANGELOG.md
@@ -249,8 +249,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
249
249
  - !ruby/object:Gem::Version
250
250
  version: '0'
251
251
  requirements: []
252
- rubyforge_project:
253
- rubygems_version: 2.7.6
252
+ rubygems_version: 3.1.2
254
253
  signing_key:
255
254
  specification_version: 4
256
255
  summary: A library that makes it easier to use the Avro serialization format from
data/.circleci/config.yml DELETED
@@ -1,36 +0,0 @@
1
- version: 2
2
- jobs:
3
- build:
4
- environment:
5
- CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
6
- CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
7
- docker:
8
- - image: circleci/ruby:2.6.2
9
- steps:
10
- - checkout
11
- - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
12
- - restore_cache:
13
- keys:
14
- # This branch if available
15
- - v1-dep-{{ .Branch }}-
16
- # Default branch if not
17
- - v1-dep-master-
18
- # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
19
- - v1-dep-
20
- - run: gem install bundler --no-document
21
- - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
22
- # Save dependency cache
23
- - save_cache:
24
- key: v1-dep-{{ .Branch }}-{{ epoch }}
25
- paths:
26
- - vendor/bundle
27
- - ~/.bundle
28
- - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
29
- - run:
30
- command: bundle exec rspec --color --require spec_helper --format progress
31
- - store_test_results:
32
- path: /tmp/circleci-test-results
33
- - store_artifacts:
34
- path: /tmp/circleci-artifacts
35
- - store_artifacts:
36
- path: /tmp/circleci-test-results