RubyGems - avro_turf - Versions diffs - 0.8.1 → 1.1.0 - Mend

avro_turf 0.8.1 → 1.1.0

Files changed (27) hide show

checksums.yaml +4 -4
data/.circleci/config.yml +36 -0
data/.github/workflows/ruby.yml +20 -0
data/.github/workflows/stale.yml +19 -0
data/CHANGELOG.md +29 -1
data/Gemfile +0 -3
data/README.md +62 -0
data/avro_turf.gemspec +7 -6
data/lib/avro_turf.rb +10 -1
data/lib/avro_turf/cached_confluent_schema_registry.rb +18 -6
data/lib/avro_turf/confluent_schema_registry.rb +23 -4
data/lib/avro_turf/disk_cache.rb +83 -0
data/lib/avro_turf/in_memory_cache.rb +38 -0
data/lib/avro_turf/messaging.rb +109 -16
data/lib/avro_turf/schema_store.rb +57 -23
data/lib/avro_turf/test/fake_confluent_schema_registry_server.rb +15 -3
data/lib/avro_turf/version.rb +1 -1
data/spec/cached_confluent_schema_registry_spec.rb +24 -2
data/spec/confluent_schema_registry_spec.rb +13 -1
data/spec/disk_cached_confluent_schema_registry_spec.rb +159 -0
data/spec/messaging_spec.rb +205 -17
data/spec/schema_store_spec.rb +134 -0
data/spec/spec_helper.rb +8 -0
data/spec/support/confluent_schema_registry_context.rb +8 -5
data/spec/test/fake_confluent_schema_registry_server_spec.rb +40 -0
metadata +37 -15
data/circle.yml +0 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8e58564680b9399ae8df438412385f23bdabd46cee8deafc0dfa1c8b827d7792
-  data.tar.gz: 1df38f38434777fab06fddec69a8834442a4814e269edbed154fc74153f6b198
+  metadata.gz: a708d217a7af1550daca801dec09f63eee619c882bf099dc456ce300e2414aee
+  data.tar.gz: e4fe1fa9f4b8af78f36a2eb59caeb6946aa1ef3a1dc0db20d548d122c8ab7802
 SHA512:
-  metadata.gz: 6e47f299a673911614be989feefb56f2cd48be6a556e240919ad23b13b55928c3cd0837d5d5f43aa4c2f72e1f416465f811de24ae7e344f858dc147dd23be136
-  data.tar.gz: 187c4f087cf7ed656ef3bfed6bf0593938f57da698b0b99550e476a059e529cf355f33ce5bc1839680c1c0513c730e285a117f33ec9e6947c476f0301ab3c597
+  metadata.gz: 5476655050a88e2e38ccedaa9cf3228810802507fa8fb0c6b7b345b2da617577a8f6d015d3d5290630ebf07476fe1aa1b2c3eea3c7de7bcf6969ee511039eff6
+  data.tar.gz: 7a61be786f276dec7a62385413f89be39188670337d739e2eddfcfc2b2ed7cf8fa73c5148d06a4a4f113cfa7d8627885529d1900b58e7f5ba624b68a8137b1b9

data/.circleci/config.yml ADDED

@@ -0,0 +1,36 @@
+version: 2
+jobs:
+  build:
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+      CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
+    docker:
+    - image: circleci/ruby:2.6.2
+    steps:
+    - checkout
+    - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
+    - restore_cache:
+        keys:
+        # This branch if available
+        - v1-dep-{{ .Branch }}-
+        # Default branch if not
+        - v1-dep-master-
+        # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
+        - v1-dep-
+    - run: gem install bundler --no-document
+    - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
+    # Save dependency cache
+    - save_cache:
+        key: v1-dep-{{ .Branch }}-{{ epoch }}
+        paths:
+        - vendor/bundle
+        - ~/.bundle
+    - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
+    - run:
+        command: bundle exec rspec --color --require spec_helper --format progress
+    - store_test_results:
+        path: /tmp/circleci-test-results
+    - store_artifacts:
+        path: /tmp/circleci-artifacts
+    - store_artifacts:
+        path: /tmp/circleci-test-results

data/.github/workflows/ruby.yml ADDED

@@ -0,0 +1,20 @@
+name: Ruby
+on: [push, pull_request]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Ruby 2.6
+      uses: actions/setup-ruby@v1
+      with:
+        ruby-version: 2.6.x
+    - name: Build and test with RSpec
+      run: |
+        gem install bundler
+        bundle install --jobs 4 --retry 3
+        bundle exec rspec

data/.github/workflows/stale.yml ADDED

@@ -0,0 +1,19 @@
+name: Mark stale issues and pull requests
+on:
+  schedule:
+  - cron: "0 0 * * *"
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-issue-message: 'Stale issue message'
+        stale-pr-message: 'Stale pull request message'
+        stale-issue-label: 'no-issue-activity'
+        stale-pr-label: 'no-pr-activity'

data/CHANGELOG.md CHANGED

@@ -1,4 +1,32 @@
-# avro_turf
+# AvroTurf
+## Unreleased
+## v1.1.0
+- Compatibility with Avro v1.10.x.
+## v1.0.0
+- Stop caching nested sub-schemas (#111)
+## v0.11.0
+- Add proxy support (#107)
+- Adding support for client certs (#109)
+## v0.10.0
+- Add more disk caching (#103)
+- Include schema information when decoding (#100, #101, #104)
+## v0.9.0
+- Compatibility with Avro v1.9.0 (#94)
+- Disable the auto registeration of schema (#95)
+- abstracted caching from CachedConfluentSchemaRegistry (#74)
+- Load avro-patches if installed to silence deprecation errors (#85)
+- Make schema store to be thread safe (#92)
 ## v0.8.1

data/Gemfile CHANGED

@@ -2,6 +2,3 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in avro_turf.gemspec
 gemspec
-# Used by CircleCI to format RSpec results.
-gem 'rspec_junit_formatter', :git => 'git@github.com:circleci/rspec_junit_formatter.git'

data/README.md CHANGED

@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
 The aliases for the original names will be removed in a future release.
+## Note about finding nested schemas
+As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
+As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
+```json
+{
+  "name": "person",
+  "namespace": "contacts",
+  "type": "record",
+  "fields": [
+    {
+      "name": "address",
+      "type": {
+        "name": "address",
+        "type": "record",
+        "fields": [
+          { "name": "addr1", "type": "string" },
+          { "name": "addr2", "type": "string" },
+          { "name": "city", "type": "string" },
+          { "name": "zip", "type": "string" }
+        ]
+      }
+    }
+  ]
+}
+```
+...this will no longer work in v0.12.0:
+```ruby
+store = AvroTurf::SchemaStore.new(path: 'my/schemas')
+store.load_schemas!
+# Accessing 'person' is correct and works fine.
+person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
+# Trying to access 'address' raises AvroTurf::SchemaNotFoundError
+address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
+```
+For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
 ## Installation
 Add this line to your application's Gemfile:
@@ -124,9 +166,29 @@ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
 # time a schema is used.
 data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
+# If you don't want to automatically register new schemas, you can pass explicitly
+# subject and version to specify which schema should be used for encoding.
+# It will fetch that schema from the registry and cache it. Subsequent instances
+# of the same schema version will be served by the cache.
+data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version: 1)
+# You can also pass explicitly schema_id to specify which schema
+# should be used for encoding.
+# It will fetch that schema from the registry and cache it. Subsequent instances
+# of the same schema version will be served by the cache.
+data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
 # When decoding, the schema will be fetched from the registry and cached. Subsequent
 # instances of the same schema id will be served by the cache.
 avro.decode(data) #=> { "title" => "hello, world" }
+# If you want to get decoded message as well as the schema used to encode the message,
+# you can use `#decode_message` method.
+result = avro.decode_message(data)
+result.message       #=> { "title" => "hello, world" }
+result.schema_id     #=> 3
+result.writer_schema #=> #<Avro::Schema: ...>
+result.reader_schema #=> nil
 ```
 ### Confluent Schema Registry Client

data/avro_turf.gemspec CHANGED

@@ -17,16 +17,17 @@ Gem::Specification.new do |spec|
   spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
   spec.require_paths = ["lib"]
-  spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
-  spec.add_dependency "excon", "~> 0.45"
+  spec.add_dependency "avro", ">= 1.7.7", "< 1.11"
+  spec.add_dependency "excon", "~> 0.71"
-  spec.add_development_dependency "bundler", "~> 1.7"
-  spec.add_development_dependency "rake", "~> 10.0"
-  spec.add_development_dependency "rspec", "~> 3.2.0"
-  spec.add_development_dependency "fakefs", "~> 0.6.7"
+  spec.add_development_dependency "bundler", "~> 2.0"
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "rspec", "~> 3.2"
+  spec.add_development_dependency "fakefs", "~> 0.20.0"
   spec.add_development_dependency "webmock"
   spec.add_development_dependency "sinatra"
   spec.add_development_dependency "json_spec"
+  spec.add_development_dependency "rack-test"
   spec.post_install_message = %{
 avro_turf v0.8.0 deprecates the names AvroTurf::SchemaRegistry,

data/lib/avro_turf.rb CHANGED

@@ -1,9 +1,18 @@
+begin
+  require 'avro-patches'
+rescue LoadError
+  false
+end
 require 'avro_turf/version'
 require 'avro'
 require 'json'
 require 'avro_turf/schema_store'
 require 'avro_turf/core_ext'
-require 'avro_turf/schema_to_avro_patch'
+# check for something that indicates Avro v1.9.0 or later
+unless defined?(::Avro::LogicalTypes)
+  require 'avro_turf/schema_to_avro_patch'
+end
 class AvroTurf
   class Error < StandardError; end

data/lib/avro_turf/cached_confluent_schema_registry.rb CHANGED

@@ -1,16 +1,23 @@
 require 'avro_turf/confluent_schema_registry'
+require 'avro_turf/in_memory_cache'
+require 'avro_turf/disk_cache'
 # Caches registrations and lookups to the schema registry in memory.
 class AvroTurf::CachedConfluentSchemaRegistry
-  def initialize(upstream)
+  # Instantiate a new CachedConfluentSchemaRegistry instance with the given configuration.
+  # By default, uses a provided InMemoryCache to prevent repeated calls to the upstream registry.
+  #
+  # upstream  - The upstream schema registry object that fully responds to all methods in the
+  #             AvroTurf::ConfluentSchemaRegistry interface.
+  # cache     - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
+  def initialize(upstream, cache: nil)
     @upstream = upstream
-    @schemas_by_id = {}
-    @ids_by_schema = {}
+    @cache = cache || AvroTurf::InMemoryCache.new()
   end
   # Delegate the following methods to the upstream
-  %i(subjects subject_versions subject_version check compatible?
+  %i(subjects subject_versions check compatible?
      global_config update_global_config subject_config update_subject_config).each do |name|
     define_method(name) do |*args|
       instance_variable_get(:@upstream).send(name, *args)
@@ -18,10 +25,15 @@ class AvroTurf::CachedConfluentSchemaRegistry
   end
   def fetch(id)
-    @schemas_by_id[id] ||= @upstream.fetch(id)
+    @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
   end
   def register(subject, schema)
-    @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
+    @cache.lookup_by_schema(subject, schema) || @cache.store_by_schema(subject, schema, @upstream.register(subject, schema))
+  end
+  def subject_version(subject, version = 'latest')
+    @cache.lookup_by_version(subject, version) ||
+      @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
   end
 end

data/lib/avro_turf/confluent_schema_registry.rb CHANGED

@@ -3,11 +3,30 @@ require 'excon'
 class AvroTurf::ConfluentSchemaRegistry
   CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
-  def initialize(url, logger: Logger.new($stdout))
+  def initialize(
+    url,
+    logger: Logger.new($stdout),
+    proxy: nil,
+    client_cert: nil,
+    client_key: nil,
+    client_key_pass: nil,
+    client_cert_data: nil,
+    client_key_data: nil
+  )
     @logger = logger
-    @connection = Excon.new(url, headers: {
-      "Content-Type" => CONTENT_TYPE,
-    })
+    headers = {
+      "Content-Type" => CONTENT_TYPE
+    }
+    headers[:proxy] = proxy if proxy&.present?
+    @connection = Excon.new(
+      url,
+      headers: headers,
+      client_cert: client_cert,
+      client_key: client_key,
+      client_key_pass: client_key_pass,
+      client_cert_data: client_cert_data,
+      client_key_data: client_key_data
+    )
   end
   def fetch(id)

data/lib/avro_turf/disk_cache.rb ADDED

@@ -0,0 +1,83 @@
+# A cache for the CachedConfluentSchemaRegistry.
+# Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
+class AvroTurf::DiskCache < AvroTurf::InMemoryCache
+  def initialize(disk_path)
+    super()
+    # load the write-thru cache on startup, if it exists
+    @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
+    @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
+    @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
+    @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
+    @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
+    @schemas_by_subject_version = {}
+  end
+  # override
+  # the write-thru cache (json) does not store keys in numeric format
+  # so, convert id to a string for caching purposes
+  def lookup_by_id(id)
+    super(id.to_s)
+  end
+  # override to include write-thru cache after storing result from upstream
+  def store_by_id(id, schema)
+    # must return the value from storing the result (i.e. do not return result from file write)
+    value = super(id.to_s, schema)
+    File.write(@schemas_by_id_path, JSON.pretty_generate(@schemas_by_id))
+    return value
+  end
+  # override to include write-thru cache after storing result from upstream
+  def store_by_schema(subject, schema, id)
+    # must return the value from storing the result (i.e. do not return result from file write)
+    value = super
+    File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
+    return value
+  end
+  # checks instance var (in-memory cache) for schema
+  # checks disk cache if in-memory cache doesn't exists
+  # if file exists but no in-memory cache, read from file and sync in-memory cache
+  # finally, if file doesn't exist return nil
+  def lookup_by_version(subject, version)
+    key = "#{subject}#{version}"
+    schema = @schemas_by_subject_version[key]
+    return schema unless schema.nil?
+    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    if hash
+      @schemas_by_subject_version = hash
+      @schemas_by_subject_version[key]
+    end
+  end
+  # check if file exists and parse json into a hash
+  # if file exists take json and overwite/insert schema at key
+  # if file doesn't exist create new hash
+  # write the new/updated hash to file
+  # update instance var (in memory-cache) to match
+  def store_by_version(subject, version, schema)
+    key = "#{subject}#{version}"
+    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    hash = if hash
+             hash[key] = schema
+             hash
+           else
+             {key => schema}
+           end
+    write_to_disk_cache(@schemas_by_subject_version_path, hash)
+    @schemas_by_subject_version = hash
+    @schemas_by_subject_version[key]
+  end
+  private def write_to_disk_cache(path, hash)
+    File.write(path, JSON.pretty_generate(hash))
+  end
+end

data/lib/avro_turf/in_memory_cache.rb ADDED

@@ -0,0 +1,38 @@
+# A cache for the CachedConfluentSchemaRegistry.
+# Simply stores the schemas and ids in in-memory hashes.
+class AvroTurf::InMemoryCache
+  def initialize
+    @schemas_by_id = {}
+    @ids_by_schema = {}
+    @schema_by_subject_version = {}
+  end
+  def lookup_by_id(id)
+    @schemas_by_id[id]
+  end
+  def store_by_id(id, schema)
+    @schemas_by_id[id] = schema
+  end
+  def lookup_by_schema(subject, schema)
+    key = subject + schema.to_s
+    @ids_by_schema[key]
+  end
+  def store_by_schema(subject, schema, id)
+    key = subject + schema.to_s
+    @ids_by_schema[key] = id
+  end
+  def lookup_by_version(subject, version)
+    key = "#{subject}#{version}"
+    @schema_by_subject_version[key]
+  end
+  def store_by_version(subject, version, schema)
+    key = "#{subject}#{version}"
+    @schema_by_subject_version[key] = schema
+  end
+end