RubyGems - avro_turf - Versions diffs - 0.8.0 → 1.0.0 - Mend

avro_turf 0.8.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +5 -5
data/.circleci/config.yml +36 -0
data/.github/workflows/ruby.yml +20 -0
data/.github/workflows/stale.yml +19 -0
data/CHANGELOG.md +30 -1
data/Gemfile +0 -3
data/README.md +62 -0
data/avro_turf.gemspec +7 -6
data/lib/avro_turf.rb +14 -3
data/lib/avro_turf/cached_confluent_schema_registry.rb +18 -6
data/lib/avro_turf/confluent_schema_registry.rb +23 -4
data/lib/avro_turf/disk_cache.rb +83 -0
data/lib/avro_turf/in_memory_cache.rb +38 -0
data/lib/avro_turf/messaging.rb +109 -16
data/lib/avro_turf/mutable_schema_store.rb +18 -0
data/lib/avro_turf/schema_store.rb +58 -22
data/lib/avro_turf/test/fake_confluent_schema_registry_server.rb +15 -3
data/lib/avro_turf/version.rb +1 -1
data/spec/cached_confluent_schema_registry_spec.rb +24 -2
data/spec/confluent_schema_registry_spec.rb +13 -1
data/spec/disk_cached_confluent_schema_registry_spec.rb +159 -0
data/spec/messaging_spec.rb +205 -17
data/spec/schema_store_spec.rb +134 -0
data/spec/spec_helper.rb +8 -0
data/spec/support/confluent_schema_registry_context.rb +8 -5
data/spec/test/fake_confluent_schema_registry_server_spec.rb +40 -0
metadata +39 -16
data/circle.yml +0 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: a708b9aabeca7d45e1db532e180b2d80e4a5aecb
-  data.tar.gz: acb21f2435fd5126efed47803395d84eb4f5a220
+SHA256:
+  metadata.gz: 692b0814cd5b8fcaaf55e6d607687714ae9bc496461fe0b838253d5fbeb2d218
+  data.tar.gz: 506a5d9bbe91a9386b1b92eb5c7e0366ad06856615442424725a82d4834cc6a6
 SHA512:
-  metadata.gz: 4bcc5e9832804eafb4f295a6fc85273a5a52c17a515f87ab3da0deb31034ed428c6aaee3754abcaa45d78d37daf6062c6c1d1007644f8578e2561b17f70f1614
-  data.tar.gz: 89071ed406d0be937344cc70814499e29206f05e52484fd4ba4f797f12bcb9236c36c4b080f22ffedd815ac3f6081e13405cb10fa1dc4184cfe73c8bbb279952
+  metadata.gz: 0db4e3e78577224cdb07ca4747c759bcdaae620dc03bbcac03194941a3bdf52bc69871cfad88dc14fc8ee6fd798a34d5acab289f8c841a97581290f258213f06
+  data.tar.gz: 672aec874705185e68dce6c8806ccf93b18ec8c3d0f34eda074e8fca6c372d4761d682ff316d3ba3d08c4b69b61a853bdb3c31ff68d4da2f0e37463a407ca656

data/.circleci/config.yml ADDED

@@ -0,0 +1,36 @@
+version: 2
+jobs:
+  build:
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+      CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
+    docker:
+    - image: circleci/ruby:2.6.2
+    steps:
+    - checkout
+    - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
+    - restore_cache:
+        keys:
+        # This branch if available
+        - v1-dep-{{ .Branch }}-
+        # Default branch if not
+        - v1-dep-master-
+        # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
+        - v1-dep-
+    - run: gem install bundler --no-document
+    - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
+    # Save dependency cache
+    - save_cache:
+        key: v1-dep-{{ .Branch }}-{{ epoch }}
+        paths:
+        - vendor/bundle
+        - ~/.bundle
+    - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
+    - run:
+        command: bundle exec rspec --color --require spec_helper --format progress
+    - store_test_results:
+        path: /tmp/circleci-test-results
+    - store_artifacts:
+        path: /tmp/circleci-artifacts
+    - store_artifacts:
+        path: /tmp/circleci-test-results

data/.github/workflows/ruby.yml ADDED

@@ -0,0 +1,20 @@
+name: Ruby
+on: [push, pull_request]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Ruby 2.6
+      uses: actions/setup-ruby@v1
+      with:
+        ruby-version: 2.6.x
+    - name: Build and test with RSpec
+      run: |
+        gem install bundler
+        bundle install --jobs 4 --retry 3
+        bundle exec rspec

data/.github/workflows/stale.yml ADDED

@@ -0,0 +1,19 @@
+name: Mark stale issues and pull requests
+on:
+  schedule:
+  - cron: "0 0 * * *"
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-issue-message: 'Stale issue message'
+        stale-pr-message: 'Stale pull request message'
+        stale-issue-label: 'no-issue-activity'
+        stale-pr-label: 'no-pr-activity'

data/CHANGELOG.md CHANGED

@@ -1,6 +1,35 @@
-# avro_turf
+# AvroTurf
+## Unreleased
+## v1.0.0
+- Stop caching nested sub-schemas (#111)
+## v0.11.0
+- Add proxy support (#107)
+- Adding support for client certs (#109)
+## v0.10.0
+- Add more disk caching (#103)
+- Include schema information when decoding (#100, #101, #104)
+## v0.9.0
+- Compatibility with Avro v1.9.0 (#94)
+- Disable the auto registeration of schema (#95)
+- abstracted caching from CachedConfluentSchemaRegistry (#74)
+- Load avro-patches if installed to silence deprecation errors (#85)
+- Make schema store to be thread safe (#92)
+## v0.8.1
+- Allow accessing schema store from outside AvroTurf (#68).
 ## v0.8.0
 - The names `AvroTurf::SchemaRegistry`, `AvroTurf::CachedSchemaRegistry`, and
   `FakeSchemaRegistryServer` are deprecated and will be removed in a future release.
   Use `AvroTurf::ConfluentSchemaRegistry`, `AvroTurf::CachedConfluentSchemaRegistry`,

data/Gemfile CHANGED

@@ -2,6 +2,3 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in avro_turf.gemspec
 gemspec
-# Used by CircleCI to format RSpec results.
-gem 'rspec_junit_formatter', :git => 'git@github.com:circleci/rspec_junit_formatter.git'

data/README.md CHANGED

@@ -16,6 +16,48 @@ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
 The aliases for the original names will be removed in a future release.
+## Note about finding nested schemas
+As of AvroTurf version 0.12.0, only top-level schemas that have their own .avsc file will be loaded and resolvable by the `AvroTurf::SchemaStore#find` method. This change will likely not affect most users. However, if you use `AvroTurf::SchemaStore#load_schemas!` to pre-cache all your schemas and then rely on `AvroTurf::SchemaStore#find` to access nested schemas that are not defined by their own .avsc files, your code may stop working when you upgrade to v0.12.0.
+As an example, if you have a `person` schema (defined in `my/schemas/contacts/person.avsc`) that defines a nested `address` schema like this:
+```json
+{
+  "name": "person",
+  "namespace": "contacts",
+  "type": "record",
+  "fields": [
+    {
+      "name": "address",
+      "type": {
+        "name": "address",
+        "type": "record",
+        "fields": [
+          { "name": "addr1", "type": "string" },
+          { "name": "addr2", "type": "string" },
+          { "name": "city", "type": "string" },
+          { "name": "zip", "type": "string" }
+        ]
+      }
+    }
+  ]
+}
+```
+...this will no longer work in v0.12.0:
+```ruby
+store = AvroTurf::SchemaStore.new(path: 'my/schemas')
+store.load_schemas!
+# Accessing 'person' is correct and works fine.
+person = store.find('person', 'contacts') # my/schemas/contacts/person.avsc exists
+# Trying to access 'address' raises AvroTurf::SchemaNotFoundError
+address = store.find('address', 'contacts') # my/schemas/contacts/address.avsc is not found
+```
+For details and context, see [this pull request](https://github.com/dasch/avro_turf/pull/111).
 ## Installation
 Add this line to your application's Gemfile:
@@ -124,9 +166,29 @@ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
 # time a schema is used.
 data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
+# If you don't want to automatically register new schemas, you can pass explicitly
+# subject and version to specify which schema should be used for encoding.
+# It will fetch that schema from the registry and cache it. Subsequent instances
+# of the same schema version will be served by the cache.
+data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version: 1)
+# You can also pass explicitly schema_id to specify which schema
+# should be used for encoding.
+# It will fetch that schema from the registry and cache it. Subsequent instances
+# of the same schema version will be served by the cache.
+data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
 # When decoding, the schema will be fetched from the registry and cached. Subsequent
 # instances of the same schema id will be served by the cache.
 avro.decode(data) #=> { "title" => "hello, world" }
+# If you want to get decoded message as well as the schema used to encode the message,
+# you can use `#decode_message` method.
+result = avro.decode_message(data)
+result.message       #=> { "title" => "hello, world" }
+result.schema_id     #=> 3
+result.writer_schema #=> #<Avro::Schema: ...>
+result.reader_schema #=> nil
 ```
 ### Confluent Schema Registry Client

data/avro_turf.gemspec CHANGED

@@ -17,16 +17,17 @@ Gem::Specification.new do |spec|
   spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
   spec.require_paths = ["lib"]
-  spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
-  spec.add_dependency "excon", "~> 0.45"
+  spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
+  spec.add_dependency "excon", "~> 0.71"
-  spec.add_development_dependency "bundler", "~> 1.7"
-  spec.add_development_dependency "rake", "~> 10.0"
-  spec.add_development_dependency "rspec", "~> 3.2.0"
-  spec.add_development_dependency "fakefs", "~> 0.6.7"
+  spec.add_development_dependency "bundler", "~> 2.0"
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "rspec", "~> 3.2"
+  spec.add_development_dependency "fakefs", "~> 0.20.0"
   spec.add_development_dependency "webmock"
   spec.add_development_dependency "sinatra"
   spec.add_development_dependency "json_spec"
+  spec.add_development_dependency "rack-test"
   spec.post_install_message = %{
 avro_turf v0.8.0 deprecates the names AvroTurf::SchemaRegistry,

data/lib/avro_turf.rb CHANGED

@@ -1,9 +1,18 @@
+begin
+  require 'avro-patches'
+rescue LoadError
+  false
+end
 require 'avro_turf/version'
 require 'avro'
 require 'json'
 require 'avro_turf/schema_store'
 require 'avro_turf/core_ext'
-require 'avro_turf/schema_to_avro_patch'
+# check for something that indicates Avro v1.9.0 or later
+unless defined?(::Avro::LogicalTypes)
+  require 'avro_turf/schema_to_avro_patch'
+end
 class AvroTurf
   class Error < StandardError; end
@@ -15,13 +24,15 @@ class AvroTurf
   # Create a new AvroTurf instance with the specified configuration.
   #
   # schemas_path - The String path to the root directory containing Avro schemas (default: "./schemas").
+  # schema_store - A schema store object that responds to #find(schema_name, namespace).
   # namespace    - The String namespace that should be used to qualify schema names (optional).
   # codec        - The String name of a codec that should be used to compress messages (optional).
   #
   # Currently, the only valid codec name is `deflate`.
-  def initialize(schemas_path: nil, namespace: nil, codec: nil)
+  def initialize(schemas_path: nil, schema_store: nil, namespace: nil, codec: nil)
     @namespace = namespace
-    @schema_store = SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
+    @schema_store = schema_store ||
+      SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
     @codec = codec
   end

data/lib/avro_turf/cached_confluent_schema_registry.rb CHANGED

@@ -1,16 +1,23 @@
 require 'avro_turf/confluent_schema_registry'
+require 'avro_turf/in_memory_cache'
+require 'avro_turf/disk_cache'
 # Caches registrations and lookups to the schema registry in memory.
 class AvroTurf::CachedConfluentSchemaRegistry
-  def initialize(upstream)
+  # Instantiate a new CachedConfluentSchemaRegistry instance with the given configuration.
+  # By default, uses a provided InMemoryCache to prevent repeated calls to the upstream registry.
+  #
+  # upstream  - The upstream schema registry object that fully responds to all methods in the
+  #             AvroTurf::ConfluentSchemaRegistry interface.
+  # cache     - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
+  def initialize(upstream, cache: nil)
     @upstream = upstream
-    @schemas_by_id = {}
-    @ids_by_schema = {}
+    @cache = cache || AvroTurf::InMemoryCache.new()
   end
   # Delegate the following methods to the upstream
-  %i(subjects subject_versions subject_version check compatible?
+  %i(subjects subject_versions check compatible?
      global_config update_global_config subject_config update_subject_config).each do |name|
     define_method(name) do |*args|
       instance_variable_get(:@upstream).send(name, *args)
@@ -18,10 +25,15 @@ class AvroTurf::CachedConfluentSchemaRegistry
   end
   def fetch(id)
-    @schemas_by_id[id] ||= @upstream.fetch(id)
+    @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
   end
   def register(subject, schema)
-    @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
+    @cache.lookup_by_schema(subject, schema) || @cache.store_by_schema(subject, schema, @upstream.register(subject, schema))
+  end
+  def subject_version(subject, version = 'latest')
+    @cache.lookup_by_version(subject, version) ||
+      @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
   end
 end

data/lib/avro_turf/confluent_schema_registry.rb CHANGED

@@ -3,11 +3,30 @@ require 'excon'
 class AvroTurf::ConfluentSchemaRegistry
   CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
-  def initialize(url, logger: Logger.new($stdout))
+  def initialize(
+    url,
+    logger: Logger.new($stdout),
+    proxy: nil,
+    client_cert: nil,
+    client_key: nil,
+    client_key_pass: nil,
+    client_cert_data: nil,
+    client_key_data: nil
+  )
     @logger = logger
-    @connection = Excon.new(url, headers: {
-      "Content-Type" => CONTENT_TYPE,
-    })
+    headers = {
+      "Content-Type" => CONTENT_TYPE
+    }
+    headers[:proxy] = proxy if proxy&.present?
+    @connection = Excon.new(
+      url,
+      headers: headers,
+      client_cert: client_cert,
+      client_key: client_key,
+      client_key_pass: client_key_pass,
+      client_cert_data: client_cert_data,
+      client_key_data: client_key_data
+    )
   end
   def fetch(id)

data/lib/avro_turf/disk_cache.rb ADDED

@@ -0,0 +1,83 @@
+# A cache for the CachedConfluentSchemaRegistry.
+# Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
+class AvroTurf::DiskCache < AvroTurf::InMemoryCache
+  def initialize(disk_path)
+    super()
+    # load the write-thru cache on startup, if it exists
+    @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
+    @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
+    @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
+    @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
+    @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
+    @schemas_by_subject_version = {}
+  end
+  # override
+  # the write-thru cache (json) does not store keys in numeric format
+  # so, convert id to a string for caching purposes
+  def lookup_by_id(id)
+    super(id.to_s)
+  end
+  # override to include write-thru cache after storing result from upstream
+  def store_by_id(id, schema)
+    # must return the value from storing the result (i.e. do not return result from file write)
+    value = super(id.to_s, schema)
+    File.write(@schemas_by_id_path, JSON.pretty_generate(@schemas_by_id))
+    return value
+  end
+  # override to include write-thru cache after storing result from upstream
+  def store_by_schema(subject, schema, id)
+    # must return the value from storing the result (i.e. do not return result from file write)
+    value = super
+    File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
+    return value
+  end
+  # checks instance var (in-memory cache) for schema
+  # checks disk cache if in-memory cache doesn't exists
+  # if file exists but no in-memory cache, read from file and sync in-memory cache
+  # finally, if file doesn't exist return nil
+  def lookup_by_version(subject, version)
+    key = "#{subject}#{version}"
+    schema = @schemas_by_subject_version[key]
+    return schema unless schema.nil?
+    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    if hash
+      @schemas_by_subject_version = hash
+      @schemas_by_subject_version[key]
+    end
+  end
+  # check if file exists and parse json into a hash
+  # if file exists take json and overwite/insert schema at key
+  # if file doesn't exist create new hash
+  # write the new/updated hash to file
+  # update instance var (in memory-cache) to match
+  def store_by_version(subject, version, schema)
+    key = "#{subject}#{version}"
+    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    hash = if hash
+             hash[key] = schema
+             hash
+           else
+             {key => schema}
+           end
+    write_to_disk_cache(@schemas_by_subject_version_path, hash)
+    @schemas_by_subject_version = hash
+    @schemas_by_subject_version[key]
+  end
+  private def write_to_disk_cache(path, hash)
+    File.write(path, JSON.pretty_generate(hash))
+  end
+end

data/lib/avro_turf/in_memory_cache.rb ADDED

@@ -0,0 +1,38 @@
+# A cache for the CachedConfluentSchemaRegistry.
+# Simply stores the schemas and ids in in-memory hashes.
+class AvroTurf::InMemoryCache
+  def initialize
+    @schemas_by_id = {}
+    @ids_by_schema = {}
+    @schema_by_subject_version = {}
+  end
+  def lookup_by_id(id)
+    @schemas_by_id[id]
+  end
+  def store_by_id(id, schema)
+    @schemas_by_id[id] = schema
+  end
+  def lookup_by_schema(subject, schema)
+    key = subject + schema.to_s
+    @ids_by_schema[key]
+  end
+  def store_by_schema(subject, schema, id)
+    key = subject + schema.to_s
+    @ids_by_schema[key] = id
+  end
+  def lookup_by_version(subject, version)
+    key = "#{subject}#{version}"
+    @schema_by_subject_version[key]
+  end
+  def store_by_version(subject, version, schema)
+    key = "#{subject}#{version}"
+    @schema_by_subject_version[key] = schema
+  end
+end