RubyGems - avro_turf - Versions diffs - 0.8.1 → 0.11.0 - Mend

avro_turf 0.8.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/.circleci/config.yml +36 -0
data/.github/workflows/ruby.yml +20 -0
data/CHANGELOG.md +20 -0
data/Gemfile +0 -3
data/README.md +20 -0
data/avro_turf.gemspec +6 -5
data/lib/avro_turf.rb +10 -1
data/lib/avro_turf/cached_confluent_schema_registry.rb +18 -6
data/lib/avro_turf/confluent_schema_registry.rb +23 -4
data/lib/avro_turf/disk_cache.rb +83 -0
data/lib/avro_turf/in_memory_cache.rb +38 -0
data/lib/avro_turf/messaging.rb +109 -16
data/lib/avro_turf/schema_store.rb +35 -20
data/lib/avro_turf/test/fake_confluent_schema_registry_server.rb +15 -3
data/lib/avro_turf/version.rb +1 -1
data/spec/cached_confluent_schema_registry_spec.rb +24 -2
data/spec/confluent_schema_registry_spec.rb +13 -1
data/spec/disk_cached_confluent_schema_registry_spec.rb +159 -0
data/spec/messaging_spec.rb +205 -17
data/spec/schema_store_spec.rb +36 -0
data/spec/spec_helper.rb +8 -0
data/spec/support/confluent_schema_registry_context.rb +8 -5
data/spec/test/fake_confluent_schema_registry_server_spec.rb +40 -0
metadata +34 -13
data/circle.yml +0 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8e58564680b9399ae8df438412385f23bdabd46cee8deafc0dfa1c8b827d7792
-  data.tar.gz: 1df38f38434777fab06fddec69a8834442a4814e269edbed154fc74153f6b198
+  metadata.gz: 1e2ee4d9598bcaa2ec5738a1130fae0b19be7b5e7250f27540313b7471f97e23
+  data.tar.gz: 0b441cb30a153958c2ea283300a1a05c26218e2a97cb807fce36f8ad9d0240da
 SHA512:
-  metadata.gz: 6e47f299a673911614be989feefb56f2cd48be6a556e240919ad23b13b55928c3cd0837d5d5f43aa4c2f72e1f416465f811de24ae7e344f858dc147dd23be136
-  data.tar.gz: 187c4f087cf7ed656ef3bfed6bf0593938f57da698b0b99550e476a059e529cf355f33ce5bc1839680c1c0513c730e285a117f33ec9e6947c476f0301ab3c597
+  metadata.gz: 12779eac5c325752cfa1be34da94ef5f332490cda4ff0aef29529a00557008cecf39592396a3f3525a2a12cb67a46744800781e0da69d4bf02511f5a2284e5e7
+  data.tar.gz: a2e4c84fb338d62296aefb8ae8c206c262d756475ec11bd9cead17bfe6015ea069ff36a891df3381385fa650cb4b5ea1584855fccc5294f5910ab34563ad973a

data/.circleci/config.yml ADDED

@@ -0,0 +1,36 @@
+version: 2
+jobs:
+  build:
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+      CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
+    docker:
+    - image: circleci/ruby:2.6.2
+    steps:
+    - checkout
+    - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
+    - restore_cache:
+        keys:
+        # This branch if available
+        - v1-dep-{{ .Branch }}-
+        # Default branch if not
+        - v1-dep-master-
+        # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
+        - v1-dep-
+    - run: gem install bundler --no-document
+    - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
+    # Save dependency cache
+    - save_cache:
+        key: v1-dep-{{ .Branch }}-{{ epoch }}
+        paths:
+        - vendor/bundle
+        - ~/.bundle
+    - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
+    - run:
+        command: bundle exec rspec --color --require spec_helper --format progress
+    - store_test_results:
+        path: /tmp/circleci-test-results
+    - store_artifacts:
+        path: /tmp/circleci-artifacts
+    - store_artifacts:
+        path: /tmp/circleci-test-results

data/.github/workflows/ruby.yml ADDED

@@ -0,0 +1,20 @@
+name: Ruby
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Ruby 2.6
+      uses: actions/setup-ruby@v1
+      with:
+        ruby-version: 2.6.x
+    - name: Build and test with RSpec
+      run: |
+        gem install bundler
+        bundle install --jobs 4 --retry 3
+        bundle exec rspec

data/CHANGELOG.md CHANGED

@@ -1,5 +1,25 @@
 # avro_turf
+## Unreleased
+## v0.11.0
+- Add proxy support (#107)
+- Adding support for client certs (#109)
+## v0.10.0
+- Add more disk caching (#103)
+- Include schema information when decoding (#100, #101, #104)
+## v0.9.0
+- Compatibility with Avro v1.9.0 (#94)
+- Disable the auto registeration of schema (#95)
+- abstracted caching from CachedConfluentSchemaRegistry (#74)
+- Load avro-patches if installed to silence deprecation errors (#85)
+- Make schema store to be thread safe (#92)
 ## v0.8.1
 - Allow accessing schema store from outside AvroTurf (#68).

data/Gemfile CHANGED

@@ -2,6 +2,3 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in avro_turf.gemspec
 gemspec
-# Used by CircleCI to format RSpec results.
-gem 'rspec_junit_formatter', :git => 'git@github.com:circleci/rspec_junit_formatter.git'

data/README.md CHANGED

@@ -124,9 +124,29 @@ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
 # time a schema is used.
 data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
+# If you don't want to automatically register new schemas, you can pass explicitly
+# subject and version to specify which schema should be used for encoding.
+# It will fetch that schema from the registry and cache it. Subsequent instances
+# of the same schema version will be served by the cache.
+data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version: 1)
+# You can also pass explicitly schema_id to specify which schema
+# should be used for encoding.
+# It will fetch that schema from the registry and cache it. Subsequent instances
+# of the same schema version will be served by the cache.
+data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
 # When decoding, the schema will be fetched from the registry and cached. Subsequent
 # instances of the same schema id will be served by the cache.
 avro.decode(data) #=> { "title" => "hello, world" }
+# If you want to get decoded message as well as the schema used to encode the message,
+# you can use `#decode_message` method.
+result = avro.decode_message(data)
+result.message       #=> { "title" => "hello, world" }
+result.schema_id     #=> 3
+result.writer_schema #=> #<Avro::Schema: ...>
+result.reader_schema #=> nil
 ```
 ### Confluent Schema Registry Client

data/avro_turf.gemspec CHANGED

@@ -17,16 +17,17 @@ Gem::Specification.new do |spec|
   spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
   spec.require_paths = ["lib"]
-  spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
+  spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
   spec.add_dependency "excon", "~> 0.45"
-  spec.add_development_dependency "bundler", "~> 1.7"
-  spec.add_development_dependency "rake", "~> 10.0"
-  spec.add_development_dependency "rspec", "~> 3.2.0"
-  spec.add_development_dependency "fakefs", "~> 0.6.7"
+  spec.add_development_dependency "bundler", "~> 2.0"
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "rspec", "~> 3.2"
+  spec.add_development_dependency "fakefs", "~> 0.20.0"
   spec.add_development_dependency "webmock"
   spec.add_development_dependency "sinatra"
   spec.add_development_dependency "json_spec"
+  spec.add_development_dependency "rack-test"
   spec.post_install_message = %{
 avro_turf v0.8.0 deprecates the names AvroTurf::SchemaRegistry,

data/lib/avro_turf.rb CHANGED

@@ -1,9 +1,18 @@
+begin
+  require 'avro-patches'
+rescue LoadError
+  false
+end
 require 'avro_turf/version'
 require 'avro'
 require 'json'
 require 'avro_turf/schema_store'
 require 'avro_turf/core_ext'
-require 'avro_turf/schema_to_avro_patch'
+# check for something that indicates Avro v1.9.0 or later
+unless defined?(::Avro::LogicalTypes)
+  require 'avro_turf/schema_to_avro_patch'
+end
 class AvroTurf
   class Error < StandardError; end

data/lib/avro_turf/cached_confluent_schema_registry.rb CHANGED

@@ -1,16 +1,23 @@
 require 'avro_turf/confluent_schema_registry'
+require 'avro_turf/in_memory_cache'
+require 'avro_turf/disk_cache'
 # Caches registrations and lookups to the schema registry in memory.
 class AvroTurf::CachedConfluentSchemaRegistry
-  def initialize(upstream)
+  # Instantiate a new CachedConfluentSchemaRegistry instance with the given configuration.
+  # By default, uses a provided InMemoryCache to prevent repeated calls to the upstream registry.
+  #
+  # upstream  - The upstream schema registry object that fully responds to all methods in the
+  #             AvroTurf::ConfluentSchemaRegistry interface.
+  # cache     - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
+  def initialize(upstream, cache: nil)
     @upstream = upstream
-    @schemas_by_id = {}
-    @ids_by_schema = {}
+    @cache = cache || AvroTurf::InMemoryCache.new()
   end
   # Delegate the following methods to the upstream
-  %i(subjects subject_versions subject_version check compatible?
+  %i(subjects subject_versions check compatible?
      global_config update_global_config subject_config update_subject_config).each do |name|
     define_method(name) do |*args|
       instance_variable_get(:@upstream).send(name, *args)
@@ -18,10 +25,15 @@ class AvroTurf::CachedConfluentSchemaRegistry
   end
   def fetch(id)
-    @schemas_by_id[id] ||= @upstream.fetch(id)
+    @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
   end
   def register(subject, schema)
-    @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
+    @cache.lookup_by_schema(subject, schema) || @cache.store_by_schema(subject, schema, @upstream.register(subject, schema))
+  end
+  def subject_version(subject, version = 'latest')
+    @cache.lookup_by_version(subject, version) ||
+      @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
   end
 end

data/lib/avro_turf/confluent_schema_registry.rb CHANGED

@@ -3,11 +3,30 @@ require 'excon'
 class AvroTurf::ConfluentSchemaRegistry
   CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
-  def initialize(url, logger: Logger.new($stdout))
+  def initialize(
+    url,
+    logger: Logger.new($stdout),
+    proxy: nil,
+    client_cert: nil,
+    client_key: nil,
+    client_key_pass: nil,
+    client_cert_data: nil,
+    client_key_data: nil
+  )
     @logger = logger
-    @connection = Excon.new(url, headers: {
-      "Content-Type" => CONTENT_TYPE,
-    })
+    headers = {
+      "Content-Type" => CONTENT_TYPE
+    }
+    headers[:proxy] = proxy if proxy&.present?
+    @connection = Excon.new(
+      url,
+      headers: headers,
+      client_cert: client_cert,
+      client_key: client_key,
+      client_key_pass: client_key_pass,
+      client_cert_data: client_cert_data,
+      client_key_data: client_key_data
+    )
   end
   def fetch(id)

data/lib/avro_turf/disk_cache.rb ADDED

@@ -0,0 +1,83 @@
+# A cache for the CachedConfluentSchemaRegistry.
+# Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
+class AvroTurf::DiskCache < AvroTurf::InMemoryCache
+  def initialize(disk_path)
+    super()
+    # load the write-thru cache on startup, if it exists
+    @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
+    @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
+    @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
+    @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
+    @schemas_by_subject_version_path = File.join(disk_path, 'schemas_by_subject_version.json')
+    @schemas_by_subject_version = {}
+  end
+  # override
+  # the write-thru cache (json) does not store keys in numeric format
+  # so, convert id to a string for caching purposes
+  def lookup_by_id(id)
+    super(id.to_s)
+  end
+  # override to include write-thru cache after storing result from upstream
+  def store_by_id(id, schema)
+    # must return the value from storing the result (i.e. do not return result from file write)
+    value = super(id.to_s, schema)
+    File.write(@schemas_by_id_path, JSON.pretty_generate(@schemas_by_id))
+    return value
+  end
+  # override to include write-thru cache after storing result from upstream
+  def store_by_schema(subject, schema, id)
+    # must return the value from storing the result (i.e. do not return result from file write)
+    value = super
+    File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
+    return value
+  end
+  # checks instance var (in-memory cache) for schema
+  # checks disk cache if in-memory cache doesn't exists
+  # if file exists but no in-memory cache, read from file and sync in-memory cache
+  # finally, if file doesn't exist return nil
+  def lookup_by_version(subject, version)
+    key = "#{subject}#{version}"
+    schema = @schemas_by_subject_version[key]
+    return schema unless schema.nil?
+    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    if hash
+      @schemas_by_subject_version = hash
+      @schemas_by_subject_version[key]
+    end
+  end
+  # check if file exists and parse json into a hash
+  # if file exists take json and overwite/insert schema at key
+  # if file doesn't exist create new hash
+  # write the new/updated hash to file
+  # update instance var (in memory-cache) to match
+  def store_by_version(subject, version, schema)
+    key = "#{subject}#{version}"
+    hash = JSON.parse(File.read(@schemas_by_subject_version_path)) if File.exist?(@schemas_by_subject_version_path)
+    hash = if hash
+             hash[key] = schema
+             hash
+           else
+             {key => schema}
+           end
+    write_to_disk_cache(@schemas_by_subject_version_path, hash)
+    @schemas_by_subject_version = hash
+    @schemas_by_subject_version[key]
+  end
+  private def write_to_disk_cache(path, hash)
+    File.write(path, JSON.pretty_generate(hash))
+  end
+end

data/lib/avro_turf/in_memory_cache.rb ADDED

@@ -0,0 +1,38 @@
+# A cache for the CachedConfluentSchemaRegistry.
+# Simply stores the schemas and ids in in-memory hashes.
+class AvroTurf::InMemoryCache
+  def initialize
+    @schemas_by_id = {}
+    @ids_by_schema = {}
+    @schema_by_subject_version = {}
+  end
+  def lookup_by_id(id)
+    @schemas_by_id[id]
+  end
+  def store_by_id(id, schema)
+    @schemas_by_id[id] = schema
+  end
+  def lookup_by_schema(subject, schema)
+    key = subject + schema.to_s
+    @ids_by_schema[key]
+  end
+  def store_by_schema(subject, schema, id)
+    key = subject + schema.to_s
+    @ids_by_schema[key] = id
+  end
+  def lookup_by_version(subject, version)
+    key = "#{subject}#{version}"
+    @schema_by_subject_version[key]
+  end
+  def store_by_version(subject, version, schema)
+    key = "#{subject}#{version}"
+    @schema_by_subject_version[key] = schema
+  end
+end

data/lib/avro_turf/messaging.rb CHANGED

@@ -21,21 +21,53 @@ class AvroTurf
   # 1: https://github.com/confluentinc/schema-registry
   class Messaging
     MAGIC_BYTE = [0].pack("C").freeze
+    DecodedMessage = Struct.new(:schema_id, :writer_schema, :reader_schema, :message)
+    private_constant(:DecodedMessage)
     # Instantiate a new Messaging instance with the given configuration.
     #
-    # registry     - A schema registry object that responds to all methods in the
-    #                AvroTurf::ConfluentSchemaRegistry interface.
-    # registry_url - The String URL of the schema registry that should be used.
-    # schema_store - A schema store object that responds to #find(schema_name, namespace).
-    # schemas_path - The String file system path where local schemas are stored.
-    # namespace    - The String default schema namespace.
-    # logger       - The Logger that should be used to log information (optional).
-    def initialize(registry: nil, registry_url: nil, schema_store: nil, schemas_path: nil, namespace: nil, logger: nil)
+    # registry          - A schema registry object that responds to all methods in the
+    #                     AvroTurf::ConfluentSchemaRegistry interface.
+    # registry_url      - The String URL of the schema registry that should be used.
+    # schema_store      - A schema store object that responds to #find(schema_name, namespace).
+    # schemas_path      - The String file system path where local schemas are stored.
+    # namespace         - The String default schema namespace.
+    # logger            - The Logger that should be used to log information (optional).
+    # proxy             - Forward the request via  proxy (optional).
+    # client_cert       - Name of file containing client certificate (optional).
+    # client_key        - Name of file containing client private key to go with client_cert (optional).
+    # client_key_pass   - Password to go with client_key (optional).
+    # client_cert_data  - In-memory client certificate (optional).
+    # client_key_data   - In-memory client private key to go with client_cert_data (optional).
+    def initialize(
+      registry: nil,
+      registry_url: nil,
+      schema_store: nil,
+      schemas_path: nil,
+      namespace: nil,
+      logger: nil,
+      proxy: nil,
+      client_cert: nil,
+      client_key: nil,
+      client_key_pass: nil,
+      client_cert_data: nil,
+      client_key_data: nil
+    )
       @logger = logger || Logger.new($stderr)
       @namespace = namespace
       @schema_store = schema_store || SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
-      @registry = registry || CachedConfluentSchemaRegistry.new(ConfluentSchemaRegistry.new(registry_url, logger: @logger))
+      @registry = registry || CachedConfluentSchemaRegistry.new(
+        ConfluentSchemaRegistry.new(
+          registry_url,
+          logger: @logger,
+          proxy: proxy,
+          client_cert: client_cert,
+          client_key: client_key,
+          client_key_pass: client_key_pass,
+          client_cert_data: client_cert_data,
+          client_key_data: client_key_data
+        )
+      )
       @schemas_by_id = {}
     end
@@ -46,14 +78,24 @@ class AvroTurf
     # schema_name - The String name of the schema that should be used to encode
     #               the data.
     # namespace   - The namespace of the schema (optional).
+    # subject     - The subject name the schema should be registered under in
+    #               the schema registry (optional).
+    # version     - The integer version of the schema that should be used to decode
+    #               the data. Must match the schema used when encoding (optional).
+    # schema_id   - The integer id of the schema that should be used to encode
+    #               the data.
     #
     # Returns the encoded data as a String.
-    def encode(message, schema_name: nil, namespace: @namespace, subject: nil)
-      schema = @schema_store.find(schema_name, namespace)
-      # Schemas are registered under the full name of the top level Avro record
-      # type, or `subject` if it's provided.
-      schema_id = @registry.register(subject || schema.fullname, schema)
+    def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil, schema_id: nil)
+      schema_id, schema = if schema_id
+        fetch_schema_by_id(schema_id)
+      elsif subject && version
+        fetch_schema(subject, version)
+      elsif schema_name
+        register_schema(subject, schema_name, namespace)
+      else
+        raise ArgumentError.new('Neither schema_name nor schema_id nor subject + version provided to determine the schema.')
+      end
       stream = StringIO.new
       writer = Avro::IO::DatumWriter.new(schema)
@@ -69,6 +111,12 @@ class AvroTurf
       writer.write(message, encoder)
       stream.string
+    rescue Excon::Error::NotFound
+      if schema_id
+        raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
+      else
+        raise SchemaNotFoundError.new("Schema with subject: `#{subject}` version: `#{version}` is not found on registry")
+      end
     end
     # Decodes data into the original message.
@@ -80,6 +128,20 @@ class AvroTurf
     #
     # Returns the decoded message.
     def decode(data, schema_name: nil, namespace: @namespace)
+      decode_message(data, schema_name: schema_name, namespace: namespace).message
+    end
+    # Decodes data into the original message.
+    #
+    # data        - A String containing encoded data.
+    # schema_name - The String name of the schema that should be used to decode
+    #               the data. Must match the schema used when encoding (optional).
+    # namespace   - The namespace of the schema (optional).
+    #
+    # Returns Struct with the next attributes:
+    #   schema_id  - The integer id of schema used to encode the message
+    #   message    - The decoded message
+    def decode_message(data, schema_name: nil, namespace: @namespace)
       readers_schema = schema_name && @schema_store.find(schema_name, namespace)
       stream = StringIO.new(data)
       decoder = Avro::IO::BinaryDecoder.new(stream)
@@ -100,7 +162,38 @@ class AvroTurf
       end
       reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
-      reader.read(decoder)
+      message = reader.read(decoder)
+      DecodedMessage.new(schema_id, writers_schema, readers_schema, message)
+    rescue Excon::Error::NotFound
+      raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
+    end
+    private
+    # Providing subject and version to determine the schema,
+    # which skips the auto registeration of schema on the schema registry.
+    # Fetch the schema from registry with the provided subject name and version.
+    def fetch_schema(subject, version)
+      schema_data = @registry.subject_version(subject, version)
+      schema_id = schema_data.fetch('id')
+      schema = Avro::Schema.parse(schema_data.fetch('schema'))
+      [schema_id, schema]
+    end
+    # Fetch the schema from registry with the provided schema_id.
+    def fetch_schema_by_id(schema_id)
+      schema_json = @registry.fetch(schema_id)
+      schema = Avro::Schema.parse(schema_json)
+      [schema_id, schema]
+    end
+    # Schemas are registered under the full name of the top level Avro record
+    # type, or `subject` if it's provided.
+    def register_schema(subject, schema_name, namespace)
+      schema = @schema_store.find(schema_name, namespace)
+      schema_id = @registry.register(subject || schema.fullname, schema)
+      [schema_id, schema]
     end
   end
 end