avro_turf 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ccc550653b8422953e55c1319f37f478d1851afab78b54c716ba23fb0e59af17
4
- data.tar.gz: a5f4721aa89807ee5b401c9a55d69d030cc2c0b7d54764b75e350f50aec9c118
3
+ metadata.gz: a4e9638703b961c348d06adcfa3e34ac2bd908e68f9cf7762a550ae4ef453b8a
4
+ data.tar.gz: 6d53b1f9b5e4b9e5a3aaeb13207bf350e2b56c8db138cb499643180a94a684b7
5
5
  SHA512:
6
- metadata.gz: 8e8e81e7e00c1fb40d43bc8ef03c64af8b4d79c921d5eaac9b32ea0039e4c877956612e98061948d7dbc0b88b320a96858b7d31d6f9f9b7bd30bead3da985035
7
- data.tar.gz: 3301e480eb51017cfd58fad552afc2b7c60b44545518fcaa8f892f90c15c7c03eb176e56af1447692d660c912cba1b22a20044f15d23d7ba949af3d075eed23b
6
+ metadata.gz: 7045f852f20d3ddeca3256724c918ed255722a1556f662d159b805921670d0a2af3e574e3c967bee7622d5d3e05f5348d6023b250006c3651cb7b1f9cec059bf
7
+ data.tar.gz: 50ca19fb058246ba19b28472e03bedcfbc09ddcd336d46deb29dec4b406d81a65c2b0f6ca84ec36d5227da035906976af133cf757cbfc2f20abda116c8e17d8c
@@ -6,6 +6,10 @@ jobs:
6
6
  build:
7
7
 
8
8
  runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ ruby-version: [1.8.x, 1.9.x, 2.0.x, 2.1.x, 2.2.x,
12
+ 2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
9
13
 
10
14
  steps:
11
15
  - uses: actions/checkout@v1
data/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## v1.3.1
6
+
7
+ - Prevent CachedConfluentSchemaRegistry from caching the 'latest' version (#140)
8
+ - Fix issue with zero length schema cache file (#138)
9
+
5
10
  ## v1.3.0
6
11
 
7
12
  - Add support for plain user/password auth to ConfluentSchemaRegistry (#120)
@@ -33,6 +33,8 @@ class AvroTurf::CachedConfluentSchemaRegistry
33
33
  end
34
34
 
35
35
  def subject_version(subject, version = 'latest')
36
+ return @upstream.subject_version(subject, version) if version == 'latest'
37
+
36
38
  @cache.lookup_by_version(subject, version) ||
37
39
  @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
38
40
  end
@@ -19,7 +19,7 @@ class AvroTurf::ConfluentSchemaRegistry
19
19
  headers = {
20
20
  "Content-Type" => CONTENT_TYPE
21
21
  }
22
- headers[:proxy] = proxy if proxy&.present?
22
+ headers[:proxy] = proxy unless proxy.nil?
23
23
  @connection = Excon.new(
24
24
  url,
25
25
  headers: headers,
@@ -2,15 +2,19 @@
2
2
  # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
3
3
  class AvroTurf::DiskCache < AvroTurf::InMemoryCache
4
4
 
5
- def initialize(disk_path)
5
+ def initialize(disk_path, logger: Logger.new($stdout))
6
6
  super()
7
7
 
8
+ @logger = logger
9
+
8
10
  # load the write-thru cache on startup, if it exists
9
11
  @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
10
- @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
12
+ hash = read_from_disk_cache(@schemas_by_id_path)
13
+ @schemas_by_id = hash if hash
11
14
 
12
15
  @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
13
- @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
16
+ hash = read_from_disk_cache(@ids_by_schema_path)
17
+ @ids_by_schema = hash if hash
14
18
 
15
19
  @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
16
20
  @schemas_by_subject_version = {}
@@ -31,12 +35,18 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
31
35
  return value
32
36
  end
33
37
 
34
- # override to include write-thru cache after storing result from upstream
38
+ # override to use a json serializable cache key
39
+ def lookup_by_schema(subject, schema)
40
+ key = "#{subject}#{schema}"
41
+ @ids_by_schema[key]
42
+ end
43
+
44
+ # override to use a json serializable cache key and update the file cache
35
45
  def store_by_schema(subject, schema, id)
36
- # must return the value from storing the result (i.e. do not return result from file write)
37
- value = super
46
+ key = "#{subject}#{schema}"
47
+ @ids_by_schema[key] = id
38
48
  File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
39
- return value
49
+ id
40
50
  end
41
51
 
42
52
  # checks instance var (in-memory cache) for schema
@@ -49,7 +59,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
49
59
 
50
60
  return schema unless schema.nil?
51
61
 
52
- hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
62
+ hash = read_from_disk_cache(@schemas_by_subject_version_path)
53
63
  if hash
54
64
  @schemas_by_subject_version = hash
55
65
  @schemas_by_subject_version[key]
@@ -63,7 +73,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
63
73
  # update instance var (in memory-cache) to match
64
74
  def store_by_version(subject, version, schema)
65
75
  key = "#{subject}#{version}"
66
- hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
76
+ hash = read_from_disk_cache(@schemas_by_subject_version_path)
67
77
  hash = if hash
68
78
  hash[key] = schema
69
79
  hash
@@ -77,6 +87,19 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
77
87
  @schemas_by_subject_version[key]
78
88
  end
79
89
 
90
+ # Parse the file from disk, if it exists and is not zero length
91
+ private def read_from_disk_cache(path)
92
+ if File.exist?(path)
93
+ if File.size(path)!=0
94
+ return JSON.parse(File.read(path))
95
+ else
96
+ # just log a message if skipping zero length file
97
+ @logger.warn "skipping JSON.parse of zero length file at #{path}"
98
+ end
99
+ end
100
+ return nil
101
+ end
102
+
80
103
  private def write_to_disk_cache(path, hash)
81
104
  File.write(path, JSON.pretty_generate(hash))
82
105
  end
@@ -17,12 +17,12 @@ class AvroTurf::InMemoryCache
17
17
  end
18
18
 
19
19
  def lookup_by_schema(subject, schema)
20
- key = subject + schema.to_s
20
+ key = [subject, schema.to_s]
21
21
  @ids_by_schema[key]
22
22
  end
23
23
 
24
24
  def store_by_schema(subject, schema, id)
25
- key = subject + schema.to_s
25
+ key = [subject, schema.to_s]
26
26
  @ids_by_schema[key] = id
27
27
  end
28
28
 
@@ -124,7 +124,7 @@ class AvroTurf
124
124
  writer.write(message, encoder)
125
125
 
126
126
  stream.string
127
- rescue Excon::Error::NotFound
127
+ rescue Excon::Errors::NotFound
128
128
  if schema_id
129
129
  raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
130
130
  else
@@ -194,8 +194,10 @@ class AvroTurf
194
194
 
195
195
  # Fetch the schema from registry with the provided schema_id.
196
196
  def fetch_schema_by_id(schema_id)
197
- schema_json = @registry.fetch(schema_id)
198
- schema = Avro::Schema.parse(schema_json)
197
+ schema = @schemas_by_id.fetch(schema_id) do
198
+ schema_json = @registry.fetch(schema_id)
199
+ Avro::Schema.parse(schema_json)
200
+ end
199
201
  [schema, schema_id]
200
202
  end
201
203
 
@@ -22,7 +22,7 @@ class AvroTurf::SchemaStore
22
22
  # Still need to check is the schema already loaded
23
23
  return @schemas[fullname] if @schemas.key?(fullname)
24
24
 
25
- load_schema!(fullname, namespace)
25
+ load_schema!(fullname)
26
26
  end
27
27
  end
28
28
 
@@ -42,13 +42,12 @@ class AvroTurf::SchemaStore
42
42
  end
43
43
  end
44
44
 
45
- private
45
+ protected
46
46
 
47
47
  # Loads single schema
48
48
  # Such method is not thread-safe, do not call it of from mutex synchronization routine
49
- def load_schema!(fullname, namespace = nil, local_schemas_cache = {})
50
- *namespace, schema_name = fullname.split(".")
51
- schema_path = File.join(@path, *namespace, schema_name + ".avsc")
49
+ def load_schema!(fullname, local_schemas_cache = {})
50
+ schema_path = build_schema_path(fullname)
52
51
  schema_json = JSON.parse(File.read(schema_path))
53
52
 
54
53
  schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
@@ -78,17 +77,22 @@ class AvroTurf::SchemaStore
78
77
  # Try to first resolve a referenced schema from disk.
79
78
  # If this is successful, the Avro gem will have mutated the
80
79
  # local_schemas_cache, adding all the new schemas it found.
81
- load_schema!($1, nil, local_schemas_cache)
80
+ load_schema!($1, local_schemas_cache)
82
81
 
83
82
  # Attempt to re-parse the original schema now that the dependency
84
83
  # has been resolved and use the now-updated local_schemas_cache to
85
84
  # pick up where we left off.
86
85
  local_schemas_cache.delete(fullname)
87
- load_schema!(fullname, nil, local_schemas_cache)
86
+ load_schema!(fullname, local_schemas_cache)
88
87
  else
89
88
  raise
90
89
  end
91
90
  rescue Errno::ENOENT, Errno::ENAMETOOLONG
92
91
  raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
93
92
  end
93
+
94
+ def build_schema_path(fullname)
95
+ *namespace, schema_name = fullname.split(".")
96
+ schema_path = File.join(@path, *namespace, schema_name + ".avsc")
97
+ end
94
98
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "1.3.0"
2
+ VERSION = "1.3.1"
3
3
  end
@@ -4,7 +4,8 @@ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
4
 
5
5
  describe AvroTurf::CachedConfluentSchemaRegistry do
6
6
  let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
- let(:cache) { AvroTurf::DiskCache.new("spec/cache")}
7
+ let(:logger_io) { StringIO.new }
8
+ let(:cache) { AvroTurf::DiskCache.new("spec/cache", logger: Logger.new(logger_io))}
8
9
  let(:registry) { described_class.new(upstream, cache: cache) }
9
10
  let(:id) { rand(999) }
10
11
  let(:schema) do
@@ -80,6 +81,40 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
80
81
  end
81
82
  end
82
83
 
84
+ describe "#fetch (zero length cache file)" do
85
+ let(:cache_after) do
86
+ {
87
+ "#{id}" => "#{schema}"
88
+ }
89
+ end
90
+
91
+ before do
92
+ # setup the disk cache with a zero length file
93
+ File.write(File.join("spec/cache", "schemas_by_id.json"), '')
94
+ end
95
+
96
+ it "skips zero length disk cache" do
97
+ # multiple calls return same result, with only one upstream call
98
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
99
+ expect(registry.fetch(id)).to eq(schema)
100
+ expect(registry.fetch(id)).to eq(schema)
101
+ expect(upstream).to have_received(:fetch).exactly(1).times
102
+ expect(load_cache("schemas_by_id.json")).to eq cache_after
103
+ expect(logger_io.string).to include("zero length file at spec/cache/schemas_by_id.json")
104
+ end
105
+ end
106
+
107
+ describe "#fetch (corrupt cache file)" do
108
+ before do
109
+ # setup the disk cache with a corrupt file (i.e. not json)
110
+ File.write(File.join("spec/cache", "schemas_by_id.json"), 'NOTJSON')
111
+ end
112
+
113
+ it "raises error on corrupt cache file" do
114
+ expect{registry.fetch(id)}.to raise_error(JSON::ParserError, /unexpected token/)
115
+ end
116
+ end
117
+
83
118
  describe "#register" do
84
119
  let(:subject_name) { "a_subject" }
85
120
  let(:cache_before) do
@@ -120,6 +155,41 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
120
155
  end
121
156
  end
122
157
 
158
+ describe "#register (zero length cache file)" do
159
+ let(:subject_name) { "a_subject" }
160
+ let(:cache_after) do
161
+ {
162
+ "#{subject_name}#{schema}" => id
163
+ }
164
+ end
165
+
166
+ before do
167
+ # setup the disk cache with a zero length file
168
+ File.write(File.join("spec/cache", "ids_by_schema.json"), '')
169
+ end
170
+
171
+ it "skips zero length disk cache" do
172
+ # multiple calls return same result, with only one upstream call
173
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
174
+ expect(registry.register(subject_name, schema)).to eq(id)
175
+ expect(registry.register(subject_name, schema)).to eq(id)
176
+ expect(upstream).to have_received(:register).exactly(1).times
177
+ expect(load_cache("ids_by_schema.json")).to eq cache_after
178
+ expect(logger_io.string).to include("zero length file at spec/cache/ids_by_schema.json")
179
+ end
180
+ end
181
+
182
+ describe "#register (corrupt cache file)" do
183
+ before do
184
+ # setup the disk cache with a corrupt file (i.e. not json)
185
+ File.write(File.join("spec/cache", "ids_by_schema.json"), 'NOTJSON')
186
+ end
187
+
188
+ it "raises error on corrupt cache file" do
189
+ expect{registry.register(subject_name, schema)}.to raise_error(JSON::ParserError, /unexpected token/)
190
+ end
191
+ end
192
+
123
193
  describe "#subject_version" do
124
194
  it "writes thru to disk cache" do
125
195
  # multiple calls return same result, with zero upstream calls
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-10 00:00:00.000000000 Z
11
+ date: 2021-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -156,14 +156,13 @@ dependencies:
156
156
  - - ">="
157
157
  - !ruby/object:Gem::Version
158
158
  version: '0'
159
- description:
159
+ description:
160
160
  email:
161
161
  - dasch@zendesk.com
162
162
  executables: []
163
163
  extensions: []
164
164
  extra_rdoc_files: []
165
165
  files:
166
- - ".circleci/config.yml"
167
166
  - ".github/workflows/ruby.yml"
168
167
  - ".github/workflows/stale.yml"
169
168
  - ".gitignore"
@@ -251,7 +250,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
251
250
  version: '0'
252
251
  requirements: []
253
252
  rubygems_version: 3.1.2
254
- signing_key:
253
+ signing_key:
255
254
  specification_version: 4
256
255
  summary: A library that makes it easier to use the Avro serialization format from
257
256
  Ruby
data/.circleci/config.yml DELETED
@@ -1,36 +0,0 @@
1
- version: 2
2
- jobs:
3
- build:
4
- environment:
5
- CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
6
- CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
7
- docker:
8
- - image: circleci/ruby:2.6.2
9
- steps:
10
- - checkout
11
- - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
12
- - restore_cache:
13
- keys:
14
- # This branch if available
15
- - v1-dep-{{ .Branch }}-
16
- # Default branch if not
17
- - v1-dep-master-
18
- # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
19
- - v1-dep-
20
- - run: gem install bundler --no-document
21
- - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
22
- # Save dependency cache
23
- - save_cache:
24
- key: v1-dep-{{ .Branch }}-{{ epoch }}
25
- paths:
26
- - vendor/bundle
27
- - ~/.bundle
28
- - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
29
- - run:
30
- command: bundle exec rspec --color --require spec_helper --format progress
31
- - store_test_results:
32
- path: /tmp/circleci-test-results
33
- - store_artifacts:
34
- path: /tmp/circleci-artifacts
35
- - store_artifacts:
36
- path: /tmp/circleci-test-results