avro_turf 0.8.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e58564680b9399ae8df438412385f23bdabd46cee8deafc0dfa1c8b827d7792
4
- data.tar.gz: 1df38f38434777fab06fddec69a8834442a4814e269edbed154fc74153f6b198
3
+ metadata.gz: 01a2bdf42a996a7a89bab1769672adc11888d973e5c0c70df42d07b63e87e2bf
4
+ data.tar.gz: c80b96eace3ae4fdb254f046335e7aaf21939cde9da86b671d7450e3d1d2ceb4
5
5
  SHA512:
6
- metadata.gz: 6e47f299a673911614be989feefb56f2cd48be6a556e240919ad23b13b55928c3cd0837d5d5f43aa4c2f72e1f416465f811de24ae7e344f858dc147dd23be136
7
- data.tar.gz: 187c4f087cf7ed656ef3bfed6bf0593938f57da698b0b99550e476a059e529cf355f33ce5bc1839680c1c0513c730e285a117f33ec9e6947c476f0301ab3c597
6
+ metadata.gz: 978d5323dd68d2a1518dadcaa90c1871b065e184ffd06ca7a9a86ad1402c16a46f557ba5847e0a841a269b38bfdcb0769a66ec961eddd6b40dd4f025c6eb2c9e
7
+ data.tar.gz: c2abf819298e5c925f60e7d5e6536aced127e1bee0ef4a66893ea25c16f1e8ea9905de9e142e3f7e0a5989901e7708a9ce000cd85384ef2a703d6e77eb27ce7d
@@ -0,0 +1,35 @@
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ environment:
5
+ CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
6
+ CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
7
+ docker:
8
+ - image: circleci/ruby:2.6.2
9
+ steps:
10
+ - checkout
11
+ - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
12
+ - restore_cache:
13
+ keys:
14
+ # This branch if available
15
+ - v1-dep-{{ .Branch }}-
16
+ # Default branch if not
17
+ - v1-dep-master-
18
+ # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
19
+ - v1-dep-
20
+ - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
21
+ # Save dependency cache
22
+ - save_cache:
23
+ key: v1-dep-{{ .Branch }}-{{ epoch }}
24
+ paths:
25
+ - vendor/bundle
26
+ - ~/.bundle
27
+ - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
28
+ - run:
29
+ command: bundle exec rspec --color --require spec_helper --format RspecJunitFormatter --out $CIRCLE_TEST_REPORTS/rspec/rspec.xml --format progress spec
30
+ - store_test_results:
31
+ path: /tmp/circleci-test-results
32
+ - store_artifacts:
33
+ path: /tmp/circleci-artifacts
34
+ - store_artifacts:
35
+ path: /tmp/circleci-test-results
@@ -1,5 +1,15 @@
1
1
  # avro_turf
2
2
 
3
+ ## Unreleased
4
+
5
+ ## v0.9.0
6
+
7
+ - Compatibility with Avro v1.9.0 (#94)
8
+ - Disable the auto registeration of schema (#95)
9
+ - abstracted caching from CachedConfluentSchemaRegistry (#74)
10
+ - Load avro-patches if installed to silence deprecation errors (#85)
11
+ - Make schema store to be thread safe (#92)
12
+
3
13
  ## v0.8.1
4
14
 
5
15
  - Allow accessing schema store from outside AvroTurf (#68).
data/README.md CHANGED
@@ -124,6 +124,12 @@ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
124
124
  # time a schema is used.
125
125
  data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
126
126
 
127
+ # If you don't want to automatically register new schemas, you can pass explicitly
128
+ # subject and version to specify which schema should be used for encoding.
129
+ # It will fetch that schema from the registry and cache it. Subsequent instances
130
+ # of the same schema version will be served by the cache.
131
+ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version: 1)
132
+
127
133
  # When decoding, the schema will be fetched from the registry and cached. Subsequent
128
134
  # instances of the same schema id will be served by the cache.
129
135
  avro.decode(data) #=> { "title" => "hello, world" }
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
21
21
  spec.add_dependency "excon", "~> 0.45"
22
22
 
23
23
  spec.add_development_dependency "bundler", "~> 1.7"
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "webmock"
28
28
  spec.add_development_dependency "sinatra"
29
29
  spec.add_development_dependency "json_spec"
30
+ spec.add_development_dependency "rack-test"
30
31
 
31
32
  spec.post_install_message = %{
32
33
  avro_turf v0.8.0 deprecates the names AvroTurf::SchemaRegistry,
@@ -1,9 +1,18 @@
1
+ begin
2
+ require 'avro-patches'
3
+ rescue LoadError
4
+ false
5
+ end
1
6
  require 'avro_turf/version'
2
7
  require 'avro'
3
8
  require 'json'
4
9
  require 'avro_turf/schema_store'
5
10
  require 'avro_turf/core_ext'
6
- require 'avro_turf/schema_to_avro_patch'
11
+
12
+ # check for something that indicates Avro v1.9.0 or later
13
+ unless defined?(::Avro::LogicalTypes)
14
+ require 'avro_turf/schema_to_avro_patch'
15
+ end
7
16
 
8
17
  class AvroTurf
9
18
  class Error < StandardError; end
@@ -1,16 +1,23 @@
1
1
  require 'avro_turf/confluent_schema_registry'
2
+ require 'avro_turf/in_memory_cache'
3
+ require 'avro_turf/disk_cache'
2
4
 
3
5
  # Caches registrations and lookups to the schema registry in memory.
4
6
  class AvroTurf::CachedConfluentSchemaRegistry
5
7
 
6
- def initialize(upstream)
8
+ # Instantiate a new CachedConfluentSchemaRegistry instance with the given configuration.
9
+ # By default, uses a provided InMemoryCache to prevent repeated calls to the upstream registry.
10
+ #
11
+ # upstream - The upstream schema registry object that fully responds to all methods in the
12
+ # AvroTurf::ConfluentSchemaRegistry interface.
13
+ # cache - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
14
+ def initialize(upstream, cache: nil)
7
15
  @upstream = upstream
8
- @schemas_by_id = {}
9
- @ids_by_schema = {}
16
+ @cache = cache || AvroTurf::InMemoryCache.new()
10
17
  end
11
18
 
12
19
  # Delegate the following methods to the upstream
13
- %i(subjects subject_versions subject_version check compatible?
20
+ %i(subjects subject_versions check compatible?
14
21
  global_config update_global_config subject_config update_subject_config).each do |name|
15
22
  define_method(name) do |*args|
16
23
  instance_variable_get(:@upstream).send(name, *args)
@@ -18,10 +25,15 @@ class AvroTurf::CachedConfluentSchemaRegistry
18
25
  end
19
26
 
20
27
  def fetch(id)
21
- @schemas_by_id[id] ||= @upstream.fetch(id)
28
+ @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
22
29
  end
23
30
 
24
31
  def register(subject, schema)
25
- @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
32
+ @cache.lookup_by_schema(subject, schema) || @cache.store_by_schema(subject, schema, @upstream.register(subject, schema))
33
+ end
34
+
35
+ def subject_version(subject, version = 'latest')
36
+ @cache.lookup_by_version(subject, version) ||
37
+ @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
26
38
  end
27
39
  end
@@ -0,0 +1,38 @@
1
+ # A cache for the CachedConfluentSchemaRegistry.
2
+ # Extends the InMemoryCache to provide a write-thru to disk for persistent cache.
3
+ class AvroTurf::DiskCache < AvroTurf::InMemoryCache
4
+
5
+ def initialize(disk_path)
6
+ super()
7
+
8
+ # load the write-thru cache on startup, if it exists
9
+ @schemas_by_id_path = File.join(disk_path, 'schemas_by_id.json')
10
+ @schemas_by_id = JSON.parse(File.read(@schemas_by_id_path)) if File.exist?(@schemas_by_id_path)
11
+
12
+ @ids_by_schema_path = File.join(disk_path, 'ids_by_schema.json')
13
+ @ids_by_schema = JSON.parse(File.read(@ids_by_schema_path)) if File.exist?(@ids_by_schema_path)
14
+ end
15
+
16
+ # override
17
+ # the write-thru cache (json) does not store keys in numeric format
18
+ # so, convert id to a string for caching purposes
19
+ def lookup_by_id(id)
20
+ super(id.to_s)
21
+ end
22
+
23
+ # override to include write-thru cache after storing result from upstream
24
+ def store_by_id(id, schema)
25
+ # must return the value from storing the result (i.e. do not return result from file write)
26
+ value = super(id.to_s, schema)
27
+ File.write(@schemas_by_id_path, JSON.pretty_generate(@schemas_by_id))
28
+ return value
29
+ end
30
+
31
+ # override to include write-thru cache after storing result from upstream
32
+ def store_by_schema(subject, schema, id)
33
+ # must return the value from storing the result (i.e. do not return result from file write)
34
+ value = super
35
+ File.write(@ids_by_schema_path, JSON.pretty_generate(@ids_by_schema))
36
+ return value
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # A cache for the CachedConfluentSchemaRegistry.
2
+ # Simply stores the schemas and ids in in-memory hashes.
3
+ class AvroTurf::InMemoryCache
4
+
5
+ def initialize
6
+ @schemas_by_id = {}
7
+ @ids_by_schema = {}
8
+ @schema_by_subject_version = {}
9
+ end
10
+
11
+ def lookup_by_id(id)
12
+ @schemas_by_id[id]
13
+ end
14
+
15
+ def store_by_id(id, schema)
16
+ @schemas_by_id[id] = schema
17
+ end
18
+
19
+ def lookup_by_schema(subject, schema)
20
+ key = subject + schema.to_s
21
+ @ids_by_schema[key]
22
+ end
23
+
24
+ def store_by_schema(subject, schema, id)
25
+ key = subject + schema.to_s
26
+ @ids_by_schema[key] = id
27
+ end
28
+
29
+ def lookup_by_version(subject, version)
30
+ key = "#{subject}#{version}"
31
+ @schema_by_subject_version[key]
32
+ end
33
+
34
+ def store_by_version(subject, version, schema)
35
+ key = "#{subject}#{version}"
36
+ @schema_by_subject_version[key] = schema
37
+ end
38
+ end
@@ -46,14 +46,20 @@ class AvroTurf
46
46
  # schema_name - The String name of the schema that should be used to encode
47
47
  # the data.
48
48
  # namespace - The namespace of the schema (optional).
49
+ # subject - The subject name the schema should be registered under in
50
+ # the schema registry (optional).
51
+ # version - The integer version of the schema that should be used to decode
52
+ # the data. Must match the schema used when encoding (optional).
49
53
  #
50
54
  # Returns the encoded data as a String.
51
- def encode(message, schema_name: nil, namespace: @namespace, subject: nil)
52
- schema = @schema_store.find(schema_name, namespace)
53
-
54
- # Schemas are registered under the full name of the top level Avro record
55
- # type, or `subject` if it's provided.
56
- schema_id = @registry.register(subject || schema.fullname, schema)
55
+ def encode(message, schema_name: nil, namespace: @namespace, subject: nil, version: nil)
56
+ schema_id, schema = if subject && version
57
+ fetch_schema(subject, version)
58
+ elsif schema_name
59
+ register_schema(subject, schema_name, namespace)
60
+ else
61
+ raise ArgumentError.new('Neither schema_name nor subject + version provided to determine the schema.')
62
+ end
57
63
 
58
64
  stream = StringIO.new
59
65
  writer = Avro::IO::DatumWriter.new(schema)
@@ -69,6 +75,8 @@ class AvroTurf
69
75
  writer.write(message, encoder)
70
76
 
71
77
  stream.string
78
+ rescue Excon::Error::NotFound
79
+ raise SchemaNotFoundError.new("Schema with subject: `#{subject}` version: `#{version}` is not found on registry")
72
80
  end
73
81
 
74
82
  # Decodes data into the original message.
@@ -101,6 +109,28 @@ class AvroTurf
101
109
 
102
110
  reader = Avro::IO::DatumReader.new(writers_schema, readers_schema)
103
111
  reader.read(decoder)
112
+ rescue Excon::Error::NotFound
113
+ raise SchemaNotFoundError.new("Schema with id: #{schema_id} is not found on registry")
114
+ end
115
+
116
+ private
117
+
118
+ # Providing subject and version to determine the schema,
119
+ # which skips the auto registeration of schema on the schema registry.
120
+ # Fetch the schema from registry with the provided subject name and version.
121
+ def fetch_schema(subject, version)
122
+ schema_data = @registry.subject_version(subject, version)
123
+ schema_id = schema_data.fetch('id')
124
+ schema = Avro::Schema.parse(schema_data.fetch('schema'))
125
+ [schema_id, schema]
126
+ end
127
+
128
+ # Schemas are registered under the full name of the top level Avro record
129
+ # type, or `subject` if it's provided.
130
+ def register_schema(subject, schema_name, namespace)
131
+ schema = @schema_store.find(schema_name, namespace)
132
+ schema_id = @registry.register(subject || schema.fullname, schema)
133
+ [schema_id, schema]
104
134
  end
105
135
  end
106
136
  end
@@ -3,6 +3,7 @@ class AvroTurf::SchemaStore
3
3
  def initialize(path: nil)
4
4
  @path = path or raise "Please specify a schema path"
5
5
  @schemas = Hash.new
6
+ @mutex = Mutex.new
6
7
  end
7
8
 
8
9
  # Resolves and returns a schema.
@@ -12,9 +13,40 @@ class AvroTurf::SchemaStore
12
13
  # Returns an Avro::Schema.
13
14
  def find(name, namespace = nil)
14
15
  fullname = Avro::Name.make_fullname(name, namespace)
15
-
16
+ # Optimistic non-blocking read from @schemas
17
+ # No sense to lock the resource when all the schemas already loaded
16
18
  return @schemas[fullname] if @schemas.key?(fullname)
17
19
 
20
+ # Pessimistic blocking write to @schemas
21
+ @mutex.synchronize do
22
+ # Still need to check is the schema already loaded
23
+ return @schemas[fullname] if @schemas.key?(fullname)
24
+
25
+ load_schema!(fullname, namespace)
26
+ end
27
+ end
28
+
29
+ # Loads all schema definition files in the `schemas_dir`.
30
+ def load_schemas!
31
+ pattern = [@path, "**", "*.avsc"].join("/")
32
+
33
+ Dir.glob(pattern) do |schema_path|
34
+ # Remove the path prefix.
35
+ schema_path.sub!(/^\/?#{@path}\//, "")
36
+
37
+ # Replace `/` with `.` and chop off the file extension.
38
+ schema_name = File.basename(schema_path.tr("/", "."), ".avsc")
39
+
40
+ # Load and cache the schema.
41
+ find(schema_name)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ # Loads single schema
48
+ # Such method is not thread-safe, do not call it of from mutex synchronization routine
49
+ def load_schema!(fullname, namespace = nil)
18
50
  *namespace, schema_name = fullname.split(".")
19
51
  schema_path = File.join(@path, *namespace, schema_name + ".avsc")
20
52
  schema_json = JSON.parse(File.read(schema_path))
@@ -29,32 +61,15 @@ class AvroTurf::SchemaStore
29
61
  # This is a hack in order to figure out exactly which type was missing. The
30
62
  # Avro gem ought to provide this data directly.
31
63
  if e.to_s =~ /"([\w\.]+)" is not a schema we know about/
32
- find($1)
64
+ load_schema!($1)
33
65
 
34
66
  # Re-resolve the original schema now that the dependency has been resolved.
35
67
  @schemas.delete(fullname)
36
- find(fullname)
68
+ load_schema!(fullname)
37
69
  else
38
70
  raise
39
71
  end
40
72
  rescue Errno::ENOENT, Errno::ENAMETOOLONG
41
73
  raise AvroTurf::SchemaNotFoundError, "could not find Avro schema at `#{schema_path}'"
42
74
  end
43
-
44
- # Loads all schema definition files in the `schemas_dir`.
45
- def load_schemas!
46
- pattern = [@path, "**", "*.avsc"].join("/")
47
-
48
- Dir.glob(pattern) do |schema_path|
49
- # Remove the path prefix.
50
- schema_path.sub!(/^\/?#{@path}\//, "")
51
-
52
- # Replace `/` with `.` and chop off the file extension.
53
- schema_name = File.basename(schema_path.tr("/", "."), ".avsc")
54
-
55
- # Load and cache the schema.
56
- find(schema_name)
57
- end
58
- end
59
-
60
75
  end
@@ -34,10 +34,21 @@ class FakeConfluentSchemaRegistryServer < Sinatra::Base
34
34
  end
35
35
 
36
36
  post "/subjects/:subject/versions" do
37
- SCHEMAS << parse_schema
37
+ schema = parse_schema
38
+ ids_for_subject = SUBJECTS[params[:subject]]
39
+
40
+ schemas_for_subject =
41
+ SCHEMAS.select
42
+ .with_index { |_, i| ids_for_subject.include?(i) }
43
+
44
+ if schemas_for_subject.include?(schema)
45
+ schema_id = SCHEMAS.index(schema)
46
+ else
47
+ SCHEMAS << schema
48
+ schema_id = SCHEMAS.size - 1
49
+ SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
50
+ end
38
51
 
39
- schema_id = SCHEMAS.size - 1
40
- SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
41
52
  { id: schema_id }.to_json
42
53
  end
43
54
 
@@ -73,6 +84,7 @@ class FakeConfluentSchemaRegistryServer < Sinatra::Base
73
84
  {
74
85
  name: params[:subject],
75
86
  version: schema_ids.index(schema_id) + 1,
87
+ id: schema_id,
76
88
  schema: schema
77
89
  }.to_json
78
90
  end
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.8.1"
2
+ VERSION = "0.9.0"
3
3
  end
@@ -16,8 +16,9 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
16
16
 
17
17
  describe "#fetch" do
18
18
  it "caches the result of fetch" do
19
+ # multiple calls return same result, with only one upstream call
19
20
  allow(upstream).to receive(:fetch).with(id).and_return(schema)
20
- registry.fetch(id)
21
+ expect(registry.fetch(id)).to eq(schema)
21
22
  expect(registry.fetch(id)).to eq(schema)
22
23
  expect(upstream).to have_received(:fetch).exactly(1).times
23
24
  end
@@ -27,13 +28,34 @@ describe AvroTurf::CachedConfluentSchemaRegistry do
27
28
  let(:subject_name) { "a_subject" }
28
29
 
29
30
  it "caches the result of register" do
31
+ # multiple calls return same result, with only one upstream call
30
32
  allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
31
- registry.register(subject_name, schema)
33
+ expect(registry.register(subject_name, schema)).to eq(id)
32
34
  expect(registry.register(subject_name, schema)).to eq(id)
33
35
  expect(upstream).to have_received(:register).exactly(1).times
34
36
  end
35
37
  end
36
38
 
39
+ describe '#subject_version' do
40
+ let(:subject_name) { 'a_subject' }
41
+ let(:version) { 1 }
42
+ let(:schema_with_meta) do
43
+ {
44
+ subject: subject_name,
45
+ id: 1,
46
+ version: 1,
47
+ schema: schema
48
+ }
49
+ end
50
+
51
+ it 'caches the result of subject_version' do
52
+ allow(upstream).to receive(:subject_version).with(subject_name, version).and_return(schema_with_meta)
53
+ registry.subject_version(subject_name, version)
54
+ registry.subject_version(subject_name, version)
55
+ expect(upstream).to have_received(:subject_version).exactly(1).times
56
+ end
57
+ end
58
+
37
59
  it_behaves_like "a confluent schema registry client" do
38
60
  let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
39
61
  let(:registry) { described_class.new(upstream) }
@@ -0,0 +1,112 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/cached_confluent_schema_registry'
3
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
+
5
+ describe AvroTurf::CachedConfluentSchemaRegistry do
6
+ let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
+ let(:cache) { AvroTurf::DiskCache.new("spec/cache")}
8
+ let(:registry) { described_class.new(upstream, cache: cache) }
9
+ let(:id) { rand(999) }
10
+ let(:schema) do
11
+ {
12
+ type: "record",
13
+ name: "person",
14
+ fields: [{ name: "name", type: "string" }]
15
+ }.to_json
16
+ end
17
+
18
+ let(:city_id) { rand(999) }
19
+ let(:city_schema) do
20
+ {
21
+ type: "record",
22
+ name: "city",
23
+ fields: [{ name: "name", type: "string" }]
24
+ }.to_json
25
+ end
26
+
27
+ before do
28
+ FileUtils.mkdir_p("spec/cache")
29
+ end
30
+
31
+ describe "#fetch" do
32
+ let(:cache_before) do
33
+ {
34
+ "#{id}" => "#{schema}"
35
+ }
36
+ end
37
+ let(:cache_after) do
38
+ {
39
+ "#{id}" => "#{schema}",
40
+ "#{city_id}" => "#{city_schema}"
41
+ }
42
+ end
43
+
44
+ # setup the disk cache to avoid performing the upstream fetch
45
+ before do
46
+ store_cache("schemas_by_id.json", cache_before)
47
+ end
48
+
49
+ it "uses preloaded disk cache" do
50
+ # multiple calls return same result, with zero upstream calls
51
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
52
+ expect(registry.fetch(id)).to eq(schema)
53
+ expect(registry.fetch(id)).to eq(schema)
54
+ expect(upstream).to have_received(:fetch).exactly(0).times
55
+ expect(load_cache("schemas_by_id.json")).to eq cache_before
56
+ end
57
+
58
+ it "writes thru to disk cache" do
59
+ # multiple calls return same result, with only one upstream call
60
+ allow(upstream).to receive(:fetch).with(city_id).and_return(city_schema)
61
+ expect(registry.fetch(city_id)).to eq(city_schema)
62
+ expect(registry.fetch(city_id)).to eq(city_schema)
63
+ expect(upstream).to have_received(:fetch).exactly(1).times
64
+ expect(load_cache("schemas_by_id.json")).to eq cache_after
65
+ end
66
+ end
67
+
68
+ describe "#register" do
69
+ let(:subject_name) { "a_subject" }
70
+ let(:cache_before) do
71
+ {
72
+ "#{subject_name}#{schema}" => id
73
+ }
74
+ end
75
+
76
+ let(:city_name) { "a_city" }
77
+ let(:cache_after) do
78
+ {
79
+ "#{subject_name}#{schema}" => id,
80
+ "#{city_name}#{city_schema}" => city_id
81
+ }
82
+ end
83
+
84
+ # setup the disk cache to avoid performing the upstream register
85
+ before do
86
+ store_cache("ids_by_schema.json", cache_before)
87
+ end
88
+
89
+ it "uses preloaded disk cache" do
90
+ # multiple calls return same result, with zero upstream calls
91
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
92
+ expect(registry.register(subject_name, schema)).to eq(id)
93
+ expect(registry.register(subject_name, schema)).to eq(id)
94
+ expect(upstream).to have_received(:register).exactly(0).times
95
+ expect(load_cache("ids_by_schema.json")).to eq cache_before
96
+ end
97
+
98
+ it "writes thru to disk cache" do
99
+ # multiple calls return same result, with only one upstream call
100
+ allow(upstream).to receive(:register).with(city_name, city_schema).and_return(city_id)
101
+ expect(registry.register(city_name, city_schema)).to eq(city_id)
102
+ expect(registry.register(city_name, city_schema)).to eq(city_id)
103
+ expect(upstream).to have_received(:register).exactly(1).times
104
+ expect(load_cache("ids_by_schema.json")).to eq cache_after
105
+ end
106
+ end
107
+
108
+ it_behaves_like "a confluent schema registry client" do
109
+ let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
110
+ let(:registry) { described_class.new(upstream) }
111
+ end
112
+ end
@@ -15,18 +15,8 @@ describe AvroTurf::Messaging do
15
15
  }
16
16
 
17
17
  let(:message) { { "full_name" => "John Doe" } }
18
-
19
- before do
20
- FileUtils.mkdir_p("spec/schemas")
21
- end
22
-
23
- before do
24
- stub_request(:any, /^#{registry_url}/).to_rack(FakeConfluentSchemaRegistryServer)
25
- FakeConfluentSchemaRegistryServer.clear
26
- end
27
-
28
- before do
29
- define_schema "person.avsc", <<-AVSC
18
+ let(:schema_json) do
19
+ <<-AVSC
30
20
  {
31
21
  "name": "person",
32
22
  "type": "record",
@@ -40,7 +30,20 @@ describe AvroTurf::Messaging do
40
30
  AVSC
41
31
  end
42
32
 
43
- shared_examples_for "encoding and decoding" do
33
+ before do
34
+ FileUtils.mkdir_p("spec/schemas")
35
+ end
36
+
37
+ before do
38
+ stub_request(:any, /^#{registry_url}/).to_rack(FakeConfluentSchemaRegistryServer)
39
+ FakeConfluentSchemaRegistryServer.clear
40
+ end
41
+
42
+ before do
43
+ define_schema "person.avsc", schema_json
44
+ end
45
+
46
+ shared_examples_for "encoding and decoding with the schema from schema store" do
44
47
  it "encodes and decodes messages" do
45
48
  data = avro.encode(message, schema_name: "person")
46
49
  expect(avro.decode(data)).to eq message
@@ -60,7 +63,39 @@ describe AvroTurf::Messaging do
60
63
  end
61
64
  end
62
65
 
63
- it_behaves_like "encoding and decoding"
66
+ shared_examples_for 'encoding and decoding with the schema from registry' do
67
+ before do
68
+ registry = AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger)
69
+ registry.register('person', Avro::Schema.parse(schema_json))
70
+ registry.register('people', Avro::Schema.parse(schema_json))
71
+ end
72
+
73
+ it 'encodes and decodes messages' do
74
+ data = avro.encode(message, subject: 'person', version: 1)
75
+ expect(avro.decode(data)).to eq message
76
+ end
77
+
78
+ it "allows specifying a reader's schema by subject and version" do
79
+ data = avro.encode(message, subject: 'person', version: 1)
80
+ expect(avro.decode(data, schema_name: 'person')).to eq message
81
+ end
82
+
83
+ it 'raises AvroTurf::SchemaNotFoundError when the schema does not exist on registry' do
84
+ expect { avro.encode(message, subject: 'missing', version: 1) }.to raise_error(AvroTurf::SchemaNotFoundError)
85
+ end
86
+
87
+ it 'caches parsed schemas for decoding' do
88
+ data = avro.encode(message, subject: 'person', version: 1)
89
+ avro.decode(data)
90
+ allow(Avro::Schema).to receive(:parse).and_call_original
91
+ expect(avro.decode(data)).to eq message
92
+ expect(Avro::Schema).not_to have_received(:parse)
93
+ end
94
+ end
95
+
96
+ it_behaves_like "encoding and decoding with the schema from schema store"
97
+
98
+ it_behaves_like 'encoding and decoding with the schema from registry'
64
99
 
65
100
  context "with a provided registry" do
66
101
  let(:registry) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
@@ -73,7 +108,9 @@ describe AvroTurf::Messaging do
73
108
  )
74
109
  end
75
110
 
76
- it_behaves_like "encoding and decoding"
111
+ it_behaves_like "encoding and decoding with the schema from schema store"
112
+
113
+ it_behaves_like 'encoding and decoding with the schema from registry'
77
114
 
78
115
  it "uses the provided registry" do
79
116
  allow(registry).to receive(:register).and_call_original
@@ -101,7 +138,7 @@ describe AvroTurf::Messaging do
101
138
  )
102
139
  end
103
140
 
104
- it_behaves_like "encoding and decoding"
141
+ it_behaves_like "encoding and decoding with the schema from schema store"
105
142
 
106
143
  it "uses the provided schema store" do
107
144
  allow(schema_store).to receive(:find).and_call_original
@@ -197,6 +197,42 @@ describe AvroTurf::SchemaStore do
197
197
  schema = store.find("person")
198
198
  expect(schema.fullname).to eq "person"
199
199
  end
200
+
201
+ it "is thread safe" do
202
+ define_schema "address.avsc", <<-AVSC
203
+ {
204
+ "type": "record",
205
+ "name": "address",
206
+ "fields": []
207
+ }
208
+ AVSC
209
+
210
+ # Set a Thread breakpoint right in the core place of race condition
211
+ expect(Avro::Name)
212
+ .to receive(:add_name)
213
+ .and_wrap_original { |m, *args|
214
+ Thread.stop
215
+ m.call(*args)
216
+ }
217
+
218
+ # Run two concurring threads which both will trigger the same schema loading
219
+ threads = 2.times.map { Thread.new { store.find("address") } }
220
+ # Wait for the moment when both threads will reach the breakpoint
221
+ sleep 0.001 until threads.all?(&:stop?)
222
+
223
+ expect {
224
+ # Resume the threads evaluation, one after one
225
+ threads.each do |thread|
226
+ next unless thread.status == 'sleep'
227
+
228
+ thread.run
229
+ sleep 0.001 until thread.stop?
230
+ end
231
+
232
+ # Ensure that threads are finished
233
+ threads.each(&:join)
234
+ }.to_not raise_error
235
+ end
200
236
  end
201
237
 
202
238
  describe "#load_schemas!" do
@@ -12,6 +12,14 @@ module Helpers
12
12
  f.write(content)
13
13
  end
14
14
  end
15
+
16
+ def store_cache(path, hash)
17
+ File.write(File.join("spec/cache", path), JSON.generate(hash))
18
+ end
19
+
20
+ def load_cache(path)
21
+ JSON.parse(File.read(File.join("spec/cache", path)))
22
+ end
15
23
  end
16
24
 
17
25
  RSpec.configure do |config|
@@ -88,16 +88,18 @@ shared_examples_for "a confluent schema registry client" do
88
88
  end
89
89
 
90
90
  describe "#subject_version" do
91
- before do
92
- 2.times do |n|
93
- registry.register(subject_name,
94
- { type: :record, name: "r#{n}", fields: [] }.to_json)
95
- end
91
+ let!(:schema_id1) do
92
+ registry.register(subject_name, { type: :record, name: "r0", fields: [] }.to_json)
93
+ end
94
+ let!(:schema_id2) do
95
+ registry.register(subject_name, { type: :record, name: "r1", fields: [] }.to_json)
96
96
  end
97
+
97
98
  let(:expected) do
98
99
  {
99
100
  name: subject_name,
100
101
  version: 1,
102
+ id: schema_id1,
101
103
  schema: { type: :record, name: "r0", fields: [] }.to_json
102
104
  }.to_json
103
105
  end
@@ -112,6 +114,7 @@ shared_examples_for "a confluent schema registry client" do
112
114
  {
113
115
  name: subject_name,
114
116
  version: 2,
117
+ id: schema_id2,
115
118
  schema: { type: :record, name: "r1", fields: [] }.to_json
116
119
  }.to_json
117
120
  end
@@ -0,0 +1,40 @@
1
+ require 'rack/test'
2
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
3
+
4
+ describe FakeConfluentSchemaRegistryServer do
5
+ include Rack::Test::Methods
6
+
7
+ def app; described_class; end
8
+
9
+ let(:schema) do
10
+ {
11
+ type: "record",
12
+ name: "person",
13
+ fields: [
14
+ { name: "name", type: "string" }
15
+ ]
16
+ }.to_json
17
+ end
18
+
19
+ describe 'POST /subjects/:subject/versions' do
20
+ it 'returns the same schema ID when invoked with same schema and same subject' do
21
+ post '/subjects/person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
22
+
23
+ expected_id = JSON.parse(last_response.body).fetch('id')
24
+
25
+ post '/subjects/person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
26
+
27
+ expect(JSON.parse(last_response.body).fetch('id')).to eq expected_id
28
+ end
29
+
30
+ it 'returns a different schema ID when invoked with same schema and different subject' do
31
+ post '/subjects/person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
32
+
33
+ original_id = JSON.parse(last_response.body).fetch('id')
34
+
35
+ post '/subjects/happy-person/versions', { schema: schema }.to_json, 'CONTENT_TYPE' => 'application/vnd.schemaregistry+json'
36
+
37
+ expect(JSON.parse(last_response.body).fetch('id')).not_to eq original_id
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_turf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-15 00:00:00.000000000 Z
11
+ date: 2019-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 1.7.7
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '1.9'
22
+ version: '1.10'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: 1.7.7
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.9'
32
+ version: '1.10'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: excon
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -142,6 +142,20 @@ dependencies:
142
142
  - - ">="
143
143
  - !ruby/object:Gem::Version
144
144
  version: '0'
145
+ - !ruby/object:Gem::Dependency
146
+ name: rack-test
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
145
159
  description:
146
160
  email:
147
161
  - dasch@zendesk.com
@@ -149,6 +163,7 @@ executables: []
149
163
  extensions: []
150
164
  extra_rdoc_files: []
151
165
  files:
166
+ - ".circleci/config.yml"
152
167
  - ".gitignore"
153
168
  - ".rspec"
154
169
  - CHANGELOG.md
@@ -157,7 +172,6 @@ files:
157
172
  - README.md
158
173
  - Rakefile
159
174
  - avro_turf.gemspec
160
- - circle.yml
161
175
  - lib/avro_turf.rb
162
176
  - lib/avro_turf/cached_confluent_schema_registry.rb
163
177
  - lib/avro_turf/cached_schema_registry.rb
@@ -173,6 +187,8 @@ files:
173
187
  - lib/avro_turf/core_ext/symbol.rb
174
188
  - lib/avro_turf/core_ext/time.rb
175
189
  - lib/avro_turf/core_ext/true_class.rb
190
+ - lib/avro_turf/disk_cache.rb
191
+ - lib/avro_turf/in_memory_cache.rb
176
192
  - lib/avro_turf/messaging.rb
177
193
  - lib/avro_turf/mutable_schema_store.rb
178
194
  - lib/avro_turf/schema_registry.rb
@@ -198,11 +214,13 @@ files:
198
214
  - spec/core_ext/symbol_spec.rb
199
215
  - spec/core_ext/time_spec.rb
200
216
  - spec/core_ext/true_class_spec.rb
217
+ - spec/disk_cached_confluent_schema_registry_spec.rb
201
218
  - spec/messaging_spec.rb
202
219
  - spec/schema_store_spec.rb
203
220
  - spec/schema_to_avro_patch_spec.rb
204
221
  - spec/spec_helper.rb
205
222
  - spec/support/confluent_schema_registry_context.rb
223
+ - spec/test/fake_confluent_schema_registry_server_spec.rb
206
224
  homepage: https://github.com/dasch/avro_turf
207
225
  licenses:
208
226
  - MIT
@@ -230,8 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
230
248
  - !ruby/object:Gem::Version
231
249
  version: '0'
232
250
  requirements: []
233
- rubyforge_project:
234
- rubygems_version: 2.7.6
251
+ rubygems_version: 3.0.3
235
252
  signing_key:
236
253
  specification_version: 4
237
254
  summary: A library that makes it easier to use the Avro serialization format from
@@ -250,8 +267,10 @@ test_files:
250
267
  - spec/core_ext/symbol_spec.rb
251
268
  - spec/core_ext/time_spec.rb
252
269
  - spec/core_ext/true_class_spec.rb
270
+ - spec/disk_cached_confluent_schema_registry_spec.rb
253
271
  - spec/messaging_spec.rb
254
272
  - spec/schema_store_spec.rb
255
273
  - spec/schema_to_avro_patch_spec.rb
256
274
  - spec/spec_helper.rb
257
275
  - spec/support/confluent_schema_registry_context.rb
276
+ - spec/test/fake_confluent_schema_registry_server_spec.rb
data/circle.yml DELETED
@@ -1,4 +0,0 @@
1
- machine:
2
- ruby:
3
- version: 2.2.0
4
- version: 2.0.0