RubyGems - avro_turf - Versions diffs - 1.3.0 → 1.3.1 - Mend

avro_turf 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +4 -0
data/CHANGELOG.md +5 -0
data/lib/avro_turf/cached_confluent_schema_registry.rb +2 -0
data/lib/avro_turf/confluent_schema_registry.rb +1 -1
data/lib/avro_turf/disk_cache.rb +32 -9
data/lib/avro_turf/in_memory_cache.rb +2 -2
data/lib/avro_turf/messaging.rb +5 -3
data/lib/avro_turf/schema_store.rb +11 -7
data/lib/avro_turf/version.rb +1 -1
data/spec/disk_cached_confluent_schema_registry_spec.rb +71 -1
metadata +5 -6
data/.circleci/config.yml +0 -36

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ccc550653b8422953e55c1319f37f478d1851afab78b54c716ba23fb0e59af17
-  data.tar.gz: a5f4721aa89807ee5b401c9a55d69d030cc2c0b7d54764b75e350f50aec9c118
+  metadata.gz: a4e9638703b961c348d06adcfa3e34ac2bd908e68f9cf7762a550ae4ef453b8a
+  data.tar.gz: 6d53b1f9b5e4b9e5a3aaeb13207bf350e2b56c8db138cb499643180a94a684b7
 SHA512:
-  metadata.gz: 8e8e81e7e00c1fb40d43bc8ef03c64af8b4d79c921d5eaac9b32ea0039e4c877956612e98061948d7dbc0b88b320a96858b7d31d6f9f9b7bd30bead3da985035
-  data.tar.gz: 3301e480eb51017cfd58fad552afc2b7c60b44545518fcaa8f892f90c15c7c03eb176e56af1447692d660c912cba1b22a20044f15d23d7ba949af3d075eed23b
+  metadata.gz: 7045f852f20d3ddeca3256724c918ed255722a1556f662d159b805921670d0a2af3e574e3c967bee7622d5d3e05f5348d6023b250006c3651cb7b1f9cec059bf
+  data.tar.gz: 50ca19fb058246ba19b28472e03bedcfbc09ddcd336d46deb29dec4b406d81a65c2b0f6ca84ec36d5227da035906976af133cf757cbfc2f20abda116c8e17d8c

data/.github/workflows/ruby.yml CHANGED Viewed

@@ -6,6 +6,10 @@ jobs:
   build:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        ruby-version: [1.8.x, 1.9.x, 2.0.x, 2.1.x, 2.2.x,
+          2.3.x, 2.4.x, 2.5.x, 2.6.x, 2.7.x, 3.0.x]
     steps:
     - uses: actions/checkout@v1

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,11 @@
 ## Unreleased
+## v1.3.1
+- Prevent CachedConfluentSchemaRegistry from caching the 'latest' version (#140)
+- Fix issue with zero length schema cache file (#138)
 ## v1.3.0
 - Add support for plain user/password auth to ConfluentSchemaRegistry (#120)

data/lib/avro_turf/cached_confluent_schema_registry.rb CHANGED Viewed

@@ -33,6 +33,8 @@ class AvroTurf::CachedConfluentSchemaRegistry
   end
   def subject_version(subject, version = 'latest')
+    return @upstream.subject_version(subject, version) if version == 'latest'
     @cache.lookup_by_version(subject, version) ||
       @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
   end

data/lib/avro_turf/confluent_schema_registry.rb CHANGED Viewed

@@ -19,7 +19,7 @@ class AvroTurf::ConfluentSchemaRegistry
     headers = {
       "Content-Type" => CONTENT_TYPE
     }
-    headers[:proxy] = proxy if proxy&.present?
+    headers[:proxy] = proxy unless proxy.nil?
     @connection = Excon.new(
       url,
       headers: headers,

data/lib/avro_turf/disk_cache.rb CHANGED Viewed

@@ -2,15 +2,19 @@
 # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
 class AvroTurf::DiskCache < AvroTurf::InMemoryCache
-  def initialize(disk_path)
+  def initialize(disk_path, logger: Logger.new($stdout))
     super()
+    @logger = logger
     # load the write-thru cache on startup, if it exists
     @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
-    @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
+    hash = read_from_disk_cache(@schemas_by_id_path)
+    @schemas_by_id = hash if hash
     @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
-    @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
+    hash = read_from_disk_cache(@ids_by_schema_path)
+    @ids_by_schema = hash if hash
     @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
     @schemas_by_subject_version = {}
@@ -31,12 +35,18 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
     return value
   end
-  # override to include write-thru cache after storing result from upstream
+  # override to use a json serializable cache key
+  def lookup_by_schema(subject, schema)
+    key = "#{subject}#{schema}"
+    @ids_by_schema[key]
+  end
+  # override to use a json serializable cache key and update the file cache
   def store_by_schema(subject, schema, id)
-    # must return the value from storing the result (i.e. do not return result from file write)
-    value = super
+    key = "#{subject}#{schema}"
+    @ids_by_schema[key] = id
     File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
-    return value
+    id
   end
   # checks instance var (in-memory cache) for schema
@@ -49,7 +59,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
     return schema unless schema.nil?
-    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    hash = read_from_disk_cache(@schemas_by_subject_version_path)
     if hash
       @schemas_by_subject_version = hash
       @schemas_by_subject_version[key]
@@ -63,7 +73,7 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
   # update instance var (in memory-cache) to match
   def store_by_version(subject, version, schema)
     key = "#{subject}#{version}"
-    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    hash = read_from_disk_cache(@schemas_by_subject_version_path)
     hash = if hash
              hash[key] = schema
              hash
@@ -77,6 +87,19 @@ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
     @schemas_by_subject_version[key]
   end
+  # Parse the file from disk, if it exists and is not zero length
+  private def read_from_disk_cache(path)
+    if File.exist?(path)
+      if File.size(path)!=0
+        return JSON.parse(File.read(path))
+      else
+        # just log a message if skipping zero length file
+        @logger.warn "skipping JSON.parse of zero length file at #{path}"
+      end
+    end
+    return nil
+  end
   private def write_to_disk_cache(path, hash)
     File.write(path, JSON.pretty_generate(hash))
   end

data/lib/avro_turf/in_memory_cache.rb CHANGED Viewed

@@ -17,12 +17,12 @@ class AvroTurf::InMemoryCache
   end
   def lookup_by_schema(subject, schema)
-    key = subject + schema.to_s
+    key = [subject, schema.to_s]
     @ids_by_schema[key]
   end
   def store_by_schema(subject, schema, id)
-    key = subject + schema.to_s
+    key = [subject, schema.to_s]
     @ids_by_schema[key] = id
   end

data/lib/avro_turf/messaging.rb CHANGED Viewed

@@ -124,7 +124,7 @@ class AvroTurf
       writer.write(message, encoder)
       stream.string
-    rescue Excon::Error::NotFound
+    rescue Excon::Errors::NotFound
       if schema_id
         raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
       else
@@ -194,8 +194,10 @@ class AvroTurf
     # Fetch the schema from registry with the provided schema_id.
     def fetch_schema_by_id(schema_id)
-      schema_json = @registry.fetch(schema_id)
-      schema = Avro::Schema.parse(schema_json)
+      schema = @schemas_by_id.fetch(schema_id) do
+        schema_json = @registry.fetch(schema_id)
+        Avro::Schema.parse(schema_json)
+      end
       [schema, schema_id]
     end

data/lib/avro_turf/schema_store.rb CHANGED Viewed

@@ -22,7 +22,7 @@ class AvroTurf::SchemaStore
       # Still need to check is the schema already loaded
       return @schemas[fullname] if @schemas.key?(fullname)
-      load_schema!(fullname, namespace)
+      load_schema!(fullname)
     end
   end
@@ -42,13 +42,12 @@ class AvroTurf::SchemaStore
     end
   end
-  private
+  protected
   # Loads single schema
   # Such method is not thread-safe, do not call it of from mutex synchronization routine
-  def load_schema!(fullname, namespace = nil, local_schemas_cache = {})
-    *namespace, schema_name = fullname.split(".")
-    schema_path = File.join(@path, *namespace, schema_name + ".avsc")
+  def load_schema!(fullname, local_schemas_cache = {})
+    schema_path = build_schema_path(fullname)
     schema_json = JSON.parse(File.read(schema_path))
     schema = Avro::Schema.real_parse(schema_json, local_schemas_cache)
@@ -78,17 +77,22 @@ class AvroTurf::SchemaStore
       # Try to first resolve a referenced schema from disk.
       # If this is successful, the Avro gem will have mutated the
       # local_schemas_cache, adding all the new schemas it found.
-      load_schema!($1, nil, local_schemas_cache)
+      load_schema!($1, local_schemas_cache)
       # Attempt to re-parse the original schema now that the dependency
       # has been resolved and use the now-updated local_schemas_cache to
       # pick up where we left off.
       local_schemas_cache.delete(fullname)
-      load_schema!(fullname, nil, local_schemas_cache)
+      load_schema!(fullname, local_schemas_cache)
     else
       raise
     end
   rescue Errno::ENOENT, Errno::ENAMETOOLONG
     raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
   end
+  def build_schema_path(fullname)
+    *namespace, schema_name = fullname.split(".")
+    schema_path = File.join(@path, *namespace, schema_name + ".avsc")
+  end
 end

data/lib/avro_turf/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class AvroTurf
-  VERSION = "1.3.0"
+  VERSION = "1.3.1"
 end

data/spec/disk_cached_confluent_schema_registry_spec.rb CHANGED Viewed

@@ -4,7 +4,8 @@ require 'avro_turf/test/fake_confluent_schema_registry_server'
 describe AvroTurf::CachedConfluentSchemaRegistry do
   let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
-  let(:cache)    { AvroTurf::DiskCache.new("spec/cache")}
+  let(:logger_io) { StringIO.new }
+  let(:cache)    { AvroTurf::DiskCache.new("spec/cache", logger: Logger.new(logger_io))}
   let(:registry) { described_class.new(upstream, cache: cache) }
   let(:id) { rand(999) }
   let(:schema) do
@@ -80,6 +81,40 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
     end
   end
+  describe "#fetch (zero length cache file)" do
+    let(:cache_after) do
+      {
+        "#{id}" => "#{schema}"
+      }
+    end
+    before do
+      # setup the disk cache with a zero length file
+      File.write(File.join("spec/cache", "schemas_by_id.json"), '')
+    end
+    it "skips zero length disk cache" do
+      # multiple calls return same result, with only one upstream call
+      allow(upstream).to receive(:fetch).with(id).and_return(schema)
+      expect(registry.fetch(id)).to eq(schema)
+      expect(registry.fetch(id)).to eq(schema)
+      expect(upstream).to have_received(:fetch).exactly(1).times
+      expect(load_cache("schemas_by_id.json")).to eq cache_after
+      expect(logger_io.string).to include("zero length file at spec/cache/schemas_by_id.json")
+    end
+  end
+  describe "#fetch (corrupt cache file)" do
+    before do
+      # setup the disk cache with a corrupt file (i.e. not json)
+      File.write(File.join("spec/cache", "schemas_by_id.json"), 'NOTJSON')
+    end
+    it "raises error on corrupt cache file" do
+      expect{registry.fetch(id)}.to raise_error(JSON::ParserError, /unexpected token/)
+    end
+  end
   describe "#register" do
     let(:subject_name) { "a_subject" }
     let(:cache_before) do
@@ -120,6 +155,41 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
     end
   end
+  describe "#register (zero length cache file)" do
+    let(:subject_name) { "a_subject" }
+    let(:cache_after) do
+      {
+        "#{subject_name}#{schema}" => id
+      }
+    end
+    before do
+      # setup the disk cache with a zero length file
+      File.write(File.join("spec/cache", "ids_by_schema.json"), '')
+    end
+    it "skips zero length disk cache" do
+      # multiple calls return same result, with only one upstream call
+      allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
+      expect(registry.register(subject_name, schema)).to eq(id)
+      expect(registry.register(subject_name, schema)).to eq(id)
+      expect(upstream).to have_received(:register).exactly(1).times
+      expect(load_cache("ids_by_schema.json")).to eq cache_after
+      expect(logger_io.string).to include("zero length file at spec/cache/ids_by_schema.json")
+    end
+  end
+  describe "#register (corrupt cache file)" do
+    before do
+      # setup the disk cache with a corrupt file (i.e. not json)
+      File.write(File.join("spec/cache", "ids_by_schema.json"), 'NOTJSON')
+    end
+    it "raises error on corrupt cache file" do
+      expect{registry.register(subject_name, schema)}.to raise_error(JSON::ParserError, /unexpected token/)
+    end
+  end
   describe "#subject_version" do
     it "writes thru to disk cache" do
       # multiple calls return same result, with zero upstream calls

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: avro_turf
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 1.3.1
 platform: ruby
 authors:
 - Daniel Schierbeck
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-09-10 00:00:00.000000000 Z
+date: 2021-02-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: avro
@@ -156,14 +156,13 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
-description:
+description:
 email:
 - dasch@zendesk.com
 executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- ".circleci/config.yml"
 - ".github/workflows/ruby.yml"
 - ".github/workflows/stale.yml"
 - ".gitignore"
@@ -251,7 +250,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubygems_version: 3.1.2
-signing_key:
+signing_key:
 specification_version: 4
 summary: A library that makes it easier to use the Avro serialization format from
   Ruby

data/.circleci/config.yml DELETED Viewed

@@ -1,36 +0,0 @@
-version: 2
-jobs:
-  build:
-    environment:
-      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
-      CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
-    docker:
-    - image: circleci/ruby:2.6.2
-    steps:
-    - checkout
-    - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
-    - restore_cache:
-        keys:
-        # This branch if available
-        - v1-dep-{{ .Branch }}-
-        # Default branch if not
-        - v1-dep-master-
-        # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
-        - v1-dep-
-    - run: gem install bundler --no-document
-    - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
-    # Save dependency cache
-    - save_cache:
-        key: v1-dep-{{ .Branch }}-{{ epoch }}
-        paths:
-        - vendor/bundle
-        - ~/.bundle
-    - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
-    - run:
-        command: bundle exec rspec --color --require spec_helper --format progress
-    - store_test_results:
-        path: /tmp/circleci-test-results
-    - store_artifacts:
-        path: /tmp/circleci-artifacts
-    - store_artifacts:
-        path: /tmp/circleci-test-results