avro_turf 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e58564680b9399ae8df438412385f23bdabd46cee8deafc0dfa1c8b827d7792
4
- data.tar.gz: 1df38f38434777fab06fddec69a8834442a4814e269edbed154fc74153f6b198
3
+ metadata.gz: 01a2bdf42a996a7a89bab1769672adc11888d973e5c0c70df42d07b63e87e2bf
4
+ data.tar.gz: c80b96eace3ae4fdb254f046335e7aaf21939cde9da86b671d7450e3d1d2ceb4
5
5
  SHA512:
6
- metadata.gz: 6e47f299a673911614be989feefb56f2cd48be6a556e240919ad23b13b55928c3cd0837d5d5f43aa4c2f72e1f416465f811de24ae7e344f858dc147dd23be136
7
- data.tar.gz: 187c4f087cf7ed656ef3bfed6bf0593938f57da698b0b99550e476a059e529cf355f33ce5bc1839680c1c0513c730e285a117f33ec9e6947c476f0301ab3c597
6
+ metadata.gz: 978d5323dd68d2a1518dadcaa90c1871b065e184ffd06ca7a9a86ad1402c16a46f557ba5847e0a841a269b38bfdcb0769a66ec961eddd6b40dd4f025c6eb2c9e
7
+ data.tar.gz: c2abf819298e5c925f60e7d5e6536aced127e1bee0ef4a66893ea25c16f1e8ea9905de9e142e3f7e0a5989901e7708a9ce000cd85384ef2a703d6e77eb27ce7d
@@ -0,0 +1,35 @@
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ environment:
5
+ CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
6
+ CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
7
+ docker:
8
+ - image: circleci/ruby:2.6.2
9
+ steps:
10
+ - checkout
11
+ - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
12
+ - restore_cache:
13
+ keys:
14
+ # This branch if available
15
+ - v1-dep-{{ .Branch }}-
16
+ # Default branch if not
17
+ - v1-dep-master-
18
+ # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
19
+ - v1-dep-
20
+ - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
21
+ # Save dependency cache
22
+ - save_cache:
23
+ key: v1-dep-{{ .Branch }}-{{ epoch }}
24
+ paths:
25
+ - vendor/bundle
26
+ - ~/.bundle
27
+ - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
28
+ - run:
29
+ command: bundle exec rspec --color --require spec_helper --format RspecJunitFormatter --out $CIRCLE_TEST_REPORTS/rspec/rspec.xml --format progress spec
30
+ - store_test_results:
31
+ path: /tmp/circleci-test-results
32
+ - store_artifacts:
33
+ path: /tmp/circleci-artifacts
34
+ - store_artifacts:
35
+ path: /tmp/circleci-test-results
@@ -1,5 +1,15 @@
1
1
  # avro_turf
2
2
 
3
+ ## Unreleased
4
+
5
+ ## v0.9.0
6
+
7
+ - Compatibility with Avro v1.9.0 (#94)
8
+ - Disable the auto registeration of schema (#95)
9
+ - abstracted caching from CachedConfluentSchemaRegistry (#74)
10
+ - Load avro-patches if installed to silence deprecation errors (#85)
11
+ - Make schema store to be thread safe (#92)
12
+
3
13
  ## v0.8.1
4
14
 
5
15
  - Allow accessing schema store from outside AvroTurf (#68).
data/README.md CHANGED
@@ -124,6 +124,12 @@ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
124
124
  # time a schema is used.
125
125
  data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
126
126
 
127
+ # If you don't want to automatically register new schemas, you can pass explicitly
128
+ # subject and version to specify which schema should be used for encoding.
129
+ # It will fetch that schema from the registry and cache it. Subsequent instances
130
+ # of the same schema version will be served by the cache.
131
+ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version: 1)
132
+
127
133
  # When decoding, the schema will be fetched from the registry and cached. Subsequent
128
134
  # instances of the same schema id will be served by the cache.
129
135
  avro.decode(data) #=> { "title" => "hello, world" }
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
21
21
  spec.add_dependency "excon", "~> 0.45"
22
22
 
23
23
  spec.add_development_dependency "bundler", "~> 1.7"
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "webmock"
28
28
  spec.add_development_dependency "sinatra"
29
29
  spec.add_development_dependency "json_spec"
30
+ spec.add_development_dependency "rack-test"
30
31
 
31
32
  spec.post_install_message = %{
32
33
  avro_turf v0.8.0 deprecates the names AvroTurf::SchemaRegistry,
@@ -1,9 +1,18 @@
1
+ begin
2
+ require 'avro-patches'
3
+ rescue LoadError
4
+ false
5
+ end
1
6
  require 'avro_turf/version'
2
7
  require 'avro'
3
8
  require 'json'
4
9
  require 'avro_turf/schema_store'
5
10
  require 'avro_turf/core_ext'
6
- require 'avro_turf/schema_to_avro_patch'
11
+
12
+ # check for something that indicates Avro v1.9.0 or later
13
+ unless defined?(::Avro::LogicalTypes)
14
+ require 'avro_turf/schema_to_avro_patch'
15
+ end
7
16
 
8
17
  class AvroTurf
9
18
  class Error < StandardError; end
@@ -1,16 +1,23 @@
1
1
  require 'avro_turf/confluent_schema_registry'
2
+ require 'avro_turf/in_memory_cache'
3
+ require 'avro_turf/disk_cache'
2
4
 
3
5
  # Caches registrations and lookups to the schema registry in memory.
4
6
  class AvroTurf::CachedConfluentSchemaRegistry
5
7
 
6
- def initialize(upstream)
8
+ # Instantiate a new CachedConfluentSchemaRegistry instance with the given configuration.
9
+ # By default, uses a provided InMemoryCache to prevent repeated calls to the upstream registry.
10
+ #
11
+ # upstream - The upstream schema registry object that fully responds to all methods in the
12
+ # AvroTurf::ConfluentSchemaRegistry interface.
13
+ # cache - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
14
+ def initialize(upstream, cache: nil)
7
15
  @upstream = upstream
8
- @schemas_by_id = {}
9
- @ids_by_schema = {}
16
+ @cache = cache || AvroTurf::InMemoryCache.new()
10
17
  end
11
18
 
12
19
  # Delegate the following methods to the upstream
13
- %i(subjects subject_versions subject_version check compatible?
20
+ %i(subjects subject_versions check compatible?
14
21
  global_config update_global_config subject_config update_subject_config).each do |name|
15
22
  define_method(name) do |*args|
16
23
  instance_variable_get(:@upstream).send(name, *args)
@@ -18,10 +25,15 @@ class AvroTurf::CachedConfluentSchemaRegistry
18
25
  end
19
26
 
20
27
  def fetch(id)
21
- @schemas_by_id[id] ||= @upstream.fetch(id)
28
+ @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
22
29
  end
23
30
 
24
31
  def register(subject, schema)
25
- @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
32
+ @cache.lookup_by_schema(subject, schema) || @cache.store_by_schema(subject, schema, @upstream.register(subject, schema))
33
+ end
34
+
35
+ def subject_version(subject, version = 'latest')
36
+ @cache.lookup_by_version(subject, version) ||
37
+ @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
26
38
  end
27
39
  end
@@ -0,0 +1,38 @@
1
+ # A cache for the CachedConfluentSchemaRegistry.
2
+ # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
3
+ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
4
+
5
+ def initialize(disk_path)
6
+ super()
7
+
8
+ # load the write-thru cache on startup, if it exists
9
+ @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
10
+ @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
11
+
12
+ @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
13
+ @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
14
+ end
15
+
16
+ # override
17
+ # the write-thru cache (json) does not store keys in numeric format
18
+ # so, convert id to a string for caching purposes
19
+ def lookup_by_id(id)
20
+ super(id.to_s)
21
+ end
22
+
23
+ # override to include write-thru cache after storing result from upstream
24
+ def store_by_id(id, schema)
25
+ # must return the value from storing the result (i.e. do not return result from file write)
26
+ value = super(id.to_s, schema)
27
+ File.write(@schemas_by_id_path, JSON.pretty_generate(@schemas_by_id))
28
+ return value
29
+ end
30
+
31
+ # override to include write-thru cache after storing result from upstream
32
+ def store_by_schema(subject, schema, id)
33
+ # must return the value from storing the result (i.e. do not return result from file write)
34
+ value = super
35
+ File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
36
+ return value
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # A cache for the CachedConfluentSchemaRegistry.
2
+ # Simply stores the schemas and ids in in-memory hashes.
3
+ class AvroTurf::InMemoryCache
4
+
5
+ def initialize
6
+ @schemas_by_id = {}
7
+ @ids_by_schema = {}
8
+ @schema_by_subject_version = {}
9
+ end
10
+
11
+ def lookup_by_id(id)
12
+ @schemas_by_id[id]
13
+ end
14
+
15
+ def store_by_id(id, schema)
16
+ @schemas_by_id[id] = schema
17
+ end
18
+
19
+ def lookup_by_schema(subject, schema)
20
+ key = subject + schema.to_s
21
+ @ids_by_schema[key]
22
+ end
23
+
24
+ def store_by_schema(subject, schema, id)
25
+ key = subject + schema.to_s
26
+ @ids_by_schema[key] = id
27
+ end
28
+
29
+ def lookup_by_version(subject, version)
30
+ key = "#{subject}#{version}"
31
+ @schema_by_subject_version[key]
32
+ end
33
+
34
+ def store_by_version(subject, version, schema)
35
+ key = "#{subject}#{version}"
36
+ @schema_by_subject_version[key] = schema
37
+ end
38
+ end
@@ -46,14 +46,20 @@ class AvroTurf
46
46
  # schema_name - The String name of the schema that should be used to encode
47
47
  # the data.
48
48
  # namespace - The namespace of the schema (optional).
49
+ # subject - The subject name the schema should be registered under in
50
+ # the schema registry (optional).
51
+ # version - The integer version of the schema that should be used to decode
52
+ # the data. Must match the schema used when encoding (optional).
49
53
  #
50
54
  # Returns the encoded data as a String.
51
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil)
52
- schema = @schema_store.find(schema_name, namespace)
53
-
54
- # Schemas are registered under the full name of the top level Avro record
55
- # type, or `subject` if it's provided.
56
- schema_id = @registry.register(subject || schema.fullname, schema)
55
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil)
56
+ schema_id, schema = if subject && version
57
+ fetch_schema(subject, version)
58
+ elsif schema_name
59
+ register_schema(subject, schema_name, namespace)
60
+ else
61
+ raise ArgumentError.new('Neither schema_name nor subject + version provided to determine the schema.')
62
+ end
57
63
 
58
64
  stream = StringIO.new
59
65
  writer = Avro::IO::DatumWriter.new(schema)
@@ -69,6 +75,8 @@ class AvroTurf
69
75
  writer.write(message, encoder)
70
76
 
71
77
  stream.string
78
+ rescue Excon::Error::NotFound
79
+ raise SchemaNotFoundError.new("Schema with subject: `#{subject}` version: `#{version}` is not found on registry")
72
80
  end
73
81
 
74
82
  # Decodes data into the original message.
@@ -101,6 +109,28 @@ class AvroTurf
101
109
 
102
110
  reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
103
111
  reader.read(decoder)
112
+ rescue Excon::Error::NotFound
113
+ raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
114
+ end
115
+
116
+ private
117
+
118
+ # Providing subject and version to determine the schema,
119
+ # which skips the auto registeration of schema on the schema registry.
120
+ # Fetch the schema from registry with the provided subject name and version.
121
+ def fetch_schema(subject, version)
122
+ schema_data = @registry.subject_version(subject, version)
123
+ schema_id = schema_data.fetch('id')
124
+ schema = Avro::Schema.parse(schema_data.fetch('schema'))
125
+ [schema_id, schema]
126
+ end
127
+
128
+ # Schemas are registered under the full name of the top level Avro record
129
+ # type, or `subject` if it's provided.
130
+ def register_schema(subject, schema_name, namespace)
131
+ schema = @schema_store.find(schema_name, namespace)
132
+ schema_id = @registry.register(subject || schema.fullname, schema)
133
+ [schema_id, schema]
104
134
  end
105
135
  end
106
136
  end
@@ -3,6 +3,7 @@ class AvroTurf::SchemaStore
3
3
  def initialize(path: nil)
4
4
  @path = path or raise "Please specify a schema path"
5
5
  @schemas = Hash.new
6
+ @mutex = Mutex.new
6
7
  end
7
8
 
8
9
  # Resolves and returns a schema.
@@ -12,9 +13,40 @@ class AvroTurf::SchemaStore
12
13
  # Returns an Avro::Schema.
13
14
  def find(name, namespace = nil)
14
15
  fullname = Avro::Name.make_fullname(name, namespace)
15
-
16
+ # Optimistic non-blocking read from @schemas
17
+ # No sense to lock the resource when all the schemas already loaded
16
18
  return @schemas[fullname] if @schemas.key?(fullname)
17
19
 
20
+ # Pessimistic blocking write to @schemas
21
+ @mutex.synchronize do
22
+ # Still need to check is the schema already loaded
23
+ return @schemas[fullname] if @schemas.key?(fullname)
24
+
25
+ load_schema!(fullname, namespace)
26
+ end
27
+ end
28
+
29
+ # Loads all schema definition files in the `schemas_dir`.
30
+ def load_schemas!
31
+ pattern = [@path, "**", "*.avsc"].join("/")
32
+
33
+ Dir.glob(pattern) do |schema_path|
34
+ # Remove the path prefix.
35
+ schema_path.sub!(/^\/?#{@path}\//, "")
36
+
37
+ # Replace `/` with `.` and chop off the file extension.
38
+ schema_name = File.basename(schema_path.tr("/", "."), ".avsc")
39
+
40
+ # Load and cache the schema.
41
+ find(schema_name)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ # Loads single schema
48
+ # Such method is not thread-safe, do not call it of from mutex synchronization routine
49
+ def load_schema!(fullname, namespace = nil)
18
50
  *namespace, schema_name = fullname.split(".")
19
51
  schema_path = File.join(@path, *namespace, schema_name + ".avsc")
20
52
  schema_json = JSON.parse(File.read(schema_path))
@@ -29,32 +61,15 @@ class AvroTurf::SchemaStore
29
61
  # This is a hack in order to figure out exactly which type was missing. The
30
62
  # Avro gem ought to provide this data directly.
31
63
  if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
32
- find($1)
64
+ load_schema!($1)
33
65
 
34
66
  # Re-resolve the original schema now that the dependency has been resolved.
35
67
  @schemas.delete(fullname)
36
- find(fullname)
68
+ load_schema!(fullname)
37
69
  else
38
70
  raise
39
71
  end
40
72
  rescue Errno::ENOENT, Errno::ENAMETOOLONG
41
73
  raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
42
74
  end
43
-
44
- # Loads all schema definition files in the `schemas_dir`.
45
- def load_schemas!
46
- pattern = [@path, "**", "*.avsc"].join("/")
47
-
48
- Dir.glob(pattern) do |schema_path|
49
- # Remove the path prefix.
50
- schema_path.sub!(/^\/?#{@path}\//, "")
51
-
52
- # Replace `/` with `.` and chop off the file extension.
53
- schema_name = File.basename(schema_path.tr("/", "."), ".avsc")
54
-
55
- # Load and cache the schema.
56
- find(schema_name)
57
- end
58
- end
59
-
60
75
  end
@@ -34,10 +34,21 @@ class FakeConfluentSchemaRegistryServer < Sinatra::Base
34
34
  end
35
35
 
36
36
  post "/subjects/:subject/versions" do
37
- SCHEMAS << parse_schema
37
+ schema = parse_schema
38
+ ids_for_subject = SUBJECTS[params[:subject]]
39
+
40
+ schemas_for_subject =
41
+ SCHEMAS.select
42
+ .with_index { |_, i| ids_for_subject.include?(i) }
43
+
44
+ if schemas_for_subject.include?(schema)
45
+ schema_id = SCHEMAS.index(schema)
46
+ else
47
+ SCHEMAS << schema
48
+ schema_id = SCHEMAS.size - 1
49
+ SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
50
+ end
38
51
 
39
- schema_id = SCHEMAS.size - 1
40
- SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
41
52
  { id: schema_id }.to_json
42
53
  end
43
54
 
@@ -73,6 +84,7 @@ class FakeConfluentSchemaRegistryServer < Sinatra::Base
73
84
  {
74
85
  name: params[:subject],
75
86
  version: schema_ids.index(schema_id) + 1,
87
+ id: schema_id,
76
88
  schema: schema
77
89
  }.to_json
78
90
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.8.1"
2
+ VERSION = "0.9.0"
3
3
  end
@@ -16,8 +16,9 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
16
16
 
17
17
  describe "#fetch" do
18
18
  it "caches the result of fetch" do
19
+ # multiple calls return same result, with only one upstream call
19
20
  allow(upstream).to receive(:fetch).with(id).and_return(schema)
20
- registry.fetch(id)
21
+ expect(registry.fetch(id)).to eq(schema)
21
22
  expect(registry.fetch(id)).to eq(schema)
22
23
  expect(upstream).to have_received(:fetch).exactly(1).times
23
24
  end
@@ -27,13 +28,34 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
27
28
  let(:subject_name) { "a_subject" }
28
29
 
29
30
  it "caches the result of register" do
31
+ # multiple calls return same result, with only one upstream call
30
32
  allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
31
- registry.register(subject_name, schema)
33
+ expect(registry.register(subject_name, schema)).to eq(id)
32
34
  expect(registry.register(subject_name, schema)).to eq(id)
33
35
  expect(upstream).to have_received(:register).exactly(1).times
34
36
  end
35
37
  end
36
38
 
39
+ describe '#subject_version' do
40
+ let(:subject_name) { 'a_subject' }
41
+ let(:version) { 1 }
42
+ let(:schema_with_meta) do
43
+ {
44
+ subject: subject_name,
45
+ id: 1,
46
+ version: 1,
47
+ schema: schema
48
+ }
49
+ end
50
+
51
+ it 'caches the result of subject_version' do
52
+ allow(upstream).to receive(:subject_version).with(subject_name, version).and_return(schema_with_meta)
53
+ registry.subject_version(subject_name, version)
54
+ registry.subject_version(subject_name, version)
55
+ expect(upstream).to have_received(:subject_version).exactly(1).times
56
+ end
57
+ end
58
+
37
59
  it_behaves_like "a confluent schema registry client" do
38
60
  let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
39
61
  let(:registry) { described_class.new(upstream) }
@@ -0,0 +1,112 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/cached_confluent_schema_registry'
3
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
+
5
+ describe AvroTurf::CachedConfluentSchemaRegistry do
6
+ let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
+ let(:cache) { AvroTurf::DiskCache.new("spec/cache")}
8
+ let(:registry) { described_class.new(upstream, cache: cache) }
9
+ let(:id) { rand(999) }
10
+ let(:schema) do
11
+ {
12
+ type: "record",
13
+ name: "person",
14
+ fields: [{ name: "name", type: "string" }]
15
+ }.to_json
16
+ end
17
+
18
+ let(:city_id) { rand(999) }
19
+ let(:city_schema) do
20
+ {
21
+ type: "record",
22
+ name: "city",
23
+ fields: [{ name: "name", type: "string" }]
24
+ }.to_json
25
+ end
26
+
27
+ before do
28
+ FileUtils.mkdir_p("spec/cache")
29
+ end
30
+
31
+ describe "#fetch" do
32
+ let(:cache_before) do
33
+ {
34
+ "#{id}" => "#{schema}"
35
+ }
36
+ end
37
+ let(:cache_after) do
38
+ {
39
+ "#{id}" => "#{schema}",
40
+ "#{city_id}" => "#{city_schema}"
41
+ }
42
+ end
43
+
44
+ # setup the disk cache to avoid performing the upstream fetch
45
+ before do
46
+ store_cache("schemas_by_id.json", cache_before)
47
+ end
48
+
49
+ it "uses preloaded disk cache" do
50
+ # multiple calls return same result, with zero upstream calls
51
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
52
+ expect(registry.fetch(id)).to eq(schema)
53
+ expect(registry.fetch(id)).to eq(schema)
54
+ expect(upstream).to have_received(:fetch).exactly(0).times
55
+ expect(load_cache("schemas_by_id.json")).to eq cache_before
56
+ end
57
+
58
+ it "writes thru to disk cache" do
59
+ # multiple calls return same result, with only one upstream call
60
+ allow(upstream).to receive(:fetch).with(city_id).and_return(city_schema)
61
+ expect(registry.fetch(city_id)).to eq(city_schema)
62
+ expect(registry.fetch(city_id)).to eq(city_schema)
63
+ expect(upstream).to have_received(:fetch).exactly(1).times
64
+ expect(load_cache("schemas_by_id.json")).to eq cache_after
65
+ end
66
+ end
67
+
68
+ describe "#register" do
69
+ let(:subject_name) { "a_subject" }
70
+ let(:cache_before) do
71
+ {
72
+ "#{subject_name}#{schema}" => id
73
+ }
74
+ end
75
+
76
+ let(:city_name) { "a_city" }
77
+ let(:cache_after) do
78
+ {
79
+ "#{subject_name}#{schema}" => id,
80
+ "#{city_name}#{city_schema}" => city_id
81
+ }
82
+ end
83
+
84
+ # setup the disk cache to avoid performing the upstream register
85
+ before do
86
+ store_cache("ids_by_schema.json", cache_before)
87
+ end
88
+
89
+ it "uses preloaded disk cache" do
90
+ # multiple calls return same result, with zero upstream calls
91
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
92
+ expect(registry.register(subject_name, schema)).to eq(id)
93
+ expect(registry.register(subject_name, schema)).to eq(id)
94
+ expect(upstream).to have_received(:register).exactly(0).times
95
+ expect(load_cache("ids_by_schema.json")).to eq cache_before
96
+ end
97
+
98
+ it "writes thru to disk cache" do
99
+ # multiple calls return same result, with only one upstream call
100
+ allow(upstream).to receive(:register).with(city_name, city_schema).and_return(city_id)
101
+ expect(registry.register(city_name, city_schema)).to eq(city_id)
102
+ expect(registry.register(city_name, city_schema)).to eq(city_id)
103
+ expect(upstream).to have_received(:register).exactly(1).times
104
+ expect(load_cache("ids_by_schema.json")).to eq cache_after
105
+ end
106
+ end
107
+
108
+ it_behaves_like "a confluent schema registry client" do
109
+ let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
110
+ let(:registry) { described_class.new(upstream) }
111
+ end
112
+ end
@@ -15,18 +15,8 @@ describe AvroTurf::Messaging do
15
15
  }
16
16
 
17
17
  let(:message) { { "full_name" => "John Doe" } }
18
-
19
- before do
20
- FileUtils.mkdir_p("spec/schemas")
21
- end
22
-
23
- before do
24
- stub_request(:any, /^#{registry_url}/).to_rack(FakeConfluentSchemaRegistryServer)
25
- FakeConfluentSchemaRegistryServer.clear
26
- end
27
-
28
- before do
29
- define_schema "person.avsc", <<-AVSC
18
+ let(:schema_json) do
19
+ <<-AVSC
30
20
  {
31
21
  "name": "person",
32
22
  "type": "record",
@@ -40,7 +30,20 @@ describe AvroTurf::Messaging do
40
30
  AVSC
41
31
  end
42
32
 
43
- shared_examples_for "encoding and decoding" do
33
+ before do
34
+ FileUtils.mkdir_p("spec/schemas")
35
+ end
36
+
37
+ before do
38
+ stub_request(:any, /^#{registry_url}/).to_rack(FakeConfluentSchemaRegistryServer)
39
+ FakeConfluentSchemaRegistryServer.clear
40
+ end
41
+
42
+ before do
43
+ define_schema "person.avsc", schema_json
44
+ end
45
+
46
+ shared_examples_for "encoding and decoding with the schema from schema store" do
44
47
  it "encodes and decodes messages" do
45
48
  data = avro.encode(message, schema_name: "person")
46
49
  expect(avro.decode(data)).to eq message
@@ -60,7 +63,39 @@ describe AvroTurf::Messaging do
60
63
  end
61
64
  end
62
65
 
63
- it_behaves_like "encoding and decoding"
66
+ shared_examples_for 'encoding and decoding with the schema from registry' do
67
+ before do
68
+ registry = AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger)
69
+ registry.register('person', Avro::Schema.parse(schema_json))
70
+ registry.register('people', Avro::Schema.parse(schema_json))
71
+ end
72
+
73
+ it 'encodes and decodes messages' do
74
+ data = avro.encode(message, subject: 'person', version: 1)
75
+ expect(avro.decode(data)).to eq message
76
+ end
77
+
78
+ it "allows specifying a reader's schema by subject and version" do
79
+ data = avro.encode(message, subject: 'person', version: 1)
80
+ expect(avro.decode(data, schema_name: 'person')).to eq message
81
+ end
82
+
83
+ it 'raises AvroTurf::SchemaNotFoundError when the schema does not exist on registry' do
84
+ expect { avro.encode(message, subject: 'missing', version: 1) }.to raise_error(AvroTurf::SchemaNotFoundError)
85
+ end
86
+
87
+ it 'caches parsed schemas for decoding' do
88
+ data = avro.encode(message, subject: 'person', version: 1)
89
+ avro.decode(data)
90
+ allow(Avro::Schema).to receive(:parse).and_call_original
91
+ expect(avro.decode(data)).to eq message
92
+ expect(Avro::Schema).not_to have_received(:parse)
93
+ end
94
+ end
95
+
96
+ it_behaves_like "encoding and decoding with the schema from schema store"
97
+
98
+ it_behaves_like 'encoding and decoding with the schema from registry'
64
99
 
65
100
  context "with a provided registry" do
66
101
  let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
@@ -73,7 +108,9 @@ describe AvroTurf::Messaging do
73
108
  )
74
109
  end
75
110
 
76
- it_behaves_like "encoding and decoding"
111
+ it_behaves_like "encoding and decoding with the schema from schema store"
112
+
113
+ it_behaves_like 'encoding and decoding with the schema from registry'
77
114
 
78
115
  it "uses the provided registry" do
79
116
  allow(registry).to receive(:register).and_call_original
@@ -101,7 +138,7 @@ describe AvroTurf::Messaging do
101
138
  )
102
139
  end
103
140
 
104
- it_behaves_like "encoding and decoding"
141
+ it_behaves_like "encoding and decoding with the schema from schema store"
105
142
 
106
143
  it "uses the provided schema store" do
107
144
  allow(schema_store).to receive(:find).and_call_original
@@ -197,6 +197,42 @@ describe AvroTurf::SchemaStore do
197
197
  schema = store.find("person")
198
198
  expect(schema.fullname).to eq "person"
199
199
  end
200
+
201
+ it "is thread safe" do
202
+ define_schema "address.avsc", <<-AVSC
203
+ {
204
+ "type": "record",
205
+ "name": "address",
206
+ "fields": []
207
+ }
208
+ AVSC
209
+
210
+ # Set a Thread breakpoint right in the core place of race condition
211
+ expect(Avro::Name)
212
+ .to receive(:add_name)
213
+ .and_wrap_original { |m, *args|
214
+ Thread.stop
215
+ m.call(*args)
216
+ }
217
+
218
+ # Run two concurring threads which both will trigger the same schema loading
219
+ threads = 2.times.map { Thread.new { store.find("address") } }
220
+ # Wait for the moment when both threads will reach the breakpoint
221
+ sleep 0.001 until threads.all?(&:stop?)
222
+
223
+ expect {
224
+ # Resume the threads evaluation, one after one
225
+ threads.each do |thread|
226
+ next unless thread.status == 'sleep'
227
+
228
+ thread.run
229
+ sleep 0.001 until thread.stop?
230
+ end
231
+
232
+ # Ensure that threads are finished
233
+ threads.each(&:join)
234
+ }.to_not raise_error
235
+ end
200
236
  end
201
237
 
202
238
  describe "#load_schemas!" do
@@ -12,6 +12,14 @@ module Helpers
12
12
  f.write(content)
13
13
  end
14
14
  end
15
+
16
+ def store_cache(path, hash)
17
+ File.write(File.join("spec/cache", path), JSON.generate(hash))
18
+ end
19
+
20
+ def load_cache(path)
21
+ JSON.parse(File.read(File.join("spec/cache", path)))
22
+ end
15
23
  end
16
24
 
17
25
  RSpec.configure do |config|
@@ -88,16 +88,18 @@ shared_examples_for "a confluent schema registry client" do
88
88
  end
89
89
 
90
90
  describe "#subject_version" do
91
- before do
92
- 2.times do |n|
93
- registry.register(subject_name,
94
- { type: :record, name: "r#{n}", fields: [] }.to_json)
95
- end
91
+ let!(:schema_id1) do
92
+ registry.register(subject_name, { type: :record, name: "r0", fields: [] }.to_json)
93
+ end
94
+ let!(:schema_id2) do
95
+ registry.register(subject_name, { type: :record, name: "r1", fields: [] }.to_json)
96
96
  end
97
+
97
98
  let(:expected) do
98
99
  {
99
100
  name: subject_name,
100
101
  version: 1,
102
+ id: schema_id1,
101
103
  schema: { type: :record, name: "r0", fields: [] }.to_json
102
104
  }.to_json
103
105
  end
@@ -112,6 +114,7 @@ shared_examples_for "a confluent schema registry client" do
112
114
  {
113
115
  name: subject_name,
114
116
  version: 2,
117
+ id: schema_id2,
115
118
  schema: { type: :record, name: "r1", fields: [] }.to_json
116
119
  }.to_json
117
120
  end
@@ -0,0 +1,40 @@
1
+ require 'rack/test'
2
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
3
+
4
+ describe FakeConfluentSchemaRegistryServer do
5
+ include Rack::Test::Methods
6
+
7
+ def app; described_class; end
8
+
9
+ let(:schema) do
10
+ {
11
+ type: "record",
12
+ name: "person",
13
+ fields: [
14
+ { name: "name", type: "string" }
15
+ ]
16
+ }.to_json
17
+ end
18
+
19
+ describe 'POST /subjects/:subject/versions' do
20
+ it 'returns the same schema ID when invoked with same schema and same subject' do
21
+ post '/subjects/person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
22
+
23
+ expected_id = JSON.parse(last_response.body).fetch('id')
24
+
25
+ post '/subjects/person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
26
+
27
+ expect(JSON.parse(last_response.body).fetch('id')).to eq expected_id
28
+ end
29
+
30
+ it 'returns a different schema ID when invoked with same schema and different subject' do
31
+ post '/subjects/person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
32
+
33
+ original_id = JSON.parse(last_response.body).fetch('id')
34
+
35
+ post '/subjects/happy-person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
36
+
37
+ expect(JSON.parse(last_response.body).fetch('id')).not_to eq original_id
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-15 00:00:00.000000000 Z
11
+ date: 2019-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 1.7.7
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '1.9'
22
+ version: '1.10'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: 1.7.7
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.9'
32
+ version: '1.10'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: excon
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -142,6 +142,20 @@ dependencies:
142
142
  - - ">="
143
143
  - !ruby/object:Gem::Version
144
144
  version: '0'
145
+ - !ruby/object:Gem::Dependency
146
+ name: rack-test
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
145
159
  description:
146
160
  email:
147
161
  - dasch@zendesk.com
@@ -149,6 +163,7 @@ executables: []
149
163
  extensions: []
150
164
  extra_rdoc_files: []
151
165
  files:
166
+ - ".circleci/config.yml"
152
167
  - ".gitignore"
153
168
  - ".rspec"
154
169
  - CHANGELOG.md
@@ -157,7 +172,6 @@ files:
157
172
  - README.md
158
173
  - Rakefile
159
174
  - avro_turf.gemspec
160
- - circle.yml
161
175
  - lib/avro_turf.rb
162
176
  - lib/avro_turf/cached_confluent_schema_registry.rb
163
177
  - lib/avro_turf/cached_schema_registry.rb
@@ -173,6 +187,8 @@ files:
173
187
  - lib/avro_turf/core_ext/symbol.rb
174
188
  - lib/avro_turf/core_ext/time.rb
175
189
  - lib/avro_turf/core_ext/true_class.rb
190
+ - lib/avro_turf/disk_cache.rb
191
+ - lib/avro_turf/in_memory_cache.rb
176
192
  - lib/avro_turf/messaging.rb
177
193
  - lib/avro_turf/mutable_schema_store.rb
178
194
  - lib/avro_turf/schema_registry.rb
@@ -198,11 +214,13 @@ files:
198
214
  - spec/core_ext/symbol_spec.rb
199
215
  - spec/core_ext/time_spec.rb
200
216
  - spec/core_ext/true_class_spec.rb
217
+ - spec/disk_cached_confluent_schema_registry_spec.rb
201
218
  - spec/messaging_spec.rb
202
219
  - spec/schema_store_spec.rb
203
220
  - spec/schema_to_avro_patch_spec.rb
204
221
  - spec/spec_helper.rb
205
222
  - spec/support/confluent_schema_registry_context.rb
223
+ - spec/test/fake_confluent_schema_registry_server_spec.rb
206
224
  homepage: https://github.com/dasch/avro_turf
207
225
  licenses:
208
226
  - MIT
@@ -230,8 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
230
248
  - !ruby/object:Gem::Version
231
249
  version: '0'
232
250
  requirements: []
233
- rubyforge_project:
234
- rubygems_version: 2.7.6
251
+ rubygems_version: 3.0.3
235
252
  signing_key:
236
253
  specification_version: 4
237
254
  summary: A library that makes it easier to use the Avro serialization format from
@@ -250,8 +267,10 @@ test_files:
250
267
  - spec/core_ext/symbol_spec.rb
251
268
  - spec/core_ext/time_spec.rb
252
269
  - spec/core_ext/true_class_spec.rb
270
+ - spec/disk_cached_confluent_schema_registry_spec.rb
253
271
  - spec/messaging_spec.rb
254
272
  - spec/schema_store_spec.rb
255
273
  - spec/schema_to_avro_patch_spec.rb
256
274
  - spec/spec_helper.rb
257
275
  - spec/support/confluent_schema_registry_context.rb
276
+ - spec/test/fake_confluent_schema_registry_server_spec.rb
data/circle.yml DELETED
@@ -1,4 +0,0 @@
1
- machine:
2
- ruby:
3
- version: 2.2.0
4
- version: 2.0.0